Date: (Tue) Apr 28, 2015
Data: Source: Training: https://courses.edx.org/c4x/MITx/15.071x_2/asset/emails.csv
New:
Time period:
Based on analysis utilizing <> techniques,
Use plot.ly for interactive plots ?
varImp for randomForest crashes in caret version:6.0.41 -> submit bug report
extensions toward multiclass classification are scheduled for the next release
glm_dmy_mdl should use the same method as glm_sel_mdl until custom dummy classifer is implemented
rm(list=ls())
set.seed(12345)
options(stringsAsFactors=FALSE)
source("~/Dropbox/datascience/R/mydsutils.R")
source("~/Dropbox/datascience/R/myplot.R")
source("~/Dropbox/datascience/R/mypetrinet.R")
# Gather all package requirements here
#suppressPackageStartupMessages(require())
#packageVersion("snow")
#require(sos); findFn("pinv", maxPages=2, sortby="MaxScore")
# Analysis control global variables
glb_trnng_url <- "https://courses.edx.org/c4x/MITx/15.071x_2/asset/emails.csv"
glb_newdt_url <- "<newdt_url>"
glb_is_separate_newent_dataset <- FALSE # or TRUE
glb_split_entity_newent_datasets <- TRUE # or FALSE
glb_split_newdata_method <- "sample" # "condition" or "sample" or "copy"
glb_split_newdata_condition <- "<col_name> <condition_operator> <value>" # or NULL
glb_split_newdata_size_ratio <- 0.3 # > 0 & < 1
glb_split_sample.seed <- 123 # or any integer
glb_max_trnent_obs <- NULL # or any integer
glb_drop_vars <- c(NULL) # or c("<col_name>")
glb_is_regression <- FALSE; glb_is_classification <- TRUE; glb_is_binomial <- TRUE
glb_rsp_var_raw <- "spam"
# for classification, the response variable has to be a factor
glb_rsp_var <- "spam.fctr"
# if the response factor is based on numbers e.g (0/1 vs. "A"/"B"),
# caret predict(..., type="prob") crashes
glb_map_rsp_raw_to_var <- function(raw) {
relevel(factor(ifelse(raw == 1, "Y", "N")), as.factor(c("Y", "N")), ref="N")
#as.factor(paste0("B", raw))
#as.factor(raw)
}
glb_map_rsp_raw_to_var(c(1, 1, 0, 0, 0))
## [1] Y Y N N N
## Levels: N Y
glb_map_rsp_var_to_raw <- function(var) {
as.numeric(var) - 1
#as.numeric(var)
#levels(var)[as.numeric(var)]
#c(" <=50K", " >50K")[as.numeric(var)]
}
glb_map_rsp_var_to_raw(glb_map_rsp_raw_to_var(c(1, 1, 0, 0, 0)))
## [1] 1 1 0 0 0
if ((glb_rsp_var != glb_rsp_var_raw) & is.null(glb_map_rsp_raw_to_var))
stop("glb_map_rsp_raw_to_var function expected")
glb_rsp_var_out <- paste0(glb_rsp_var, ".predict.") # model_id is appended later
glb_id_vars <- c(NULL) # or c("<id_var>")
glb_is_textual <- TRUE # or TRUE # vs. glb_is_numerical ???
#Sys.setlocale("LC_ALL", "C") # For english
glb_txt_vars <- c("text") # or c("<col_name>")
glb_append_stop_words <- NULL # or c("<freq_word>")
glb_sprs_threshold <- 0.950 # Ideally, numrows(glb_feats_df) << numrows(glb_trnent_df)
# List transformed vars
glb_exclude_vars_as_features <- c(NULL) # or c("<var_name>")
if (glb_is_textual)
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features,
glb_txt_vars)
if (glb_rsp_var_raw != glb_rsp_var)
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features,
glb_rsp_var_raw)
# List feats that shd be excluded due to known causation by prediction variable
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features,
c(NULL)) # or c("<col_name>")
# List output vars (useful during testing in console)
# glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features,
# grep(glb_rsp_var_out, names(glb_trnent_df), value=TRUE))
glb_impute_na_data <- FALSE # or TRUE
glb_mice_complete.seed <- 144 # or any integer
# rpart: .rnorm messes with the models badly
# caret creates dummy vars for factor feats which messes up the tuning
# - better to feed as.numeric(<feat>.fctr) to caret
# Regression
if (glb_is_regression)
glb_models_method_vctr <- c("lm", "glm", "rpart", "rf") else
# Classification
if (glb_is_binomial)
glb_models_method_vctr <- c("glm", "rpart", "rf") else
glb_models_method_vctr <- c("rpart", "rf")
# Baseline prediction model feature(s)
glb_Baseline_mdl_var <- NULL # or c("<col_name>")
glb_model_metric_terms <- NULL # or matrix(c(
# 0,1,2,3,4,
# 2,0,1,2,3,
# 4,2,0,1,2,
# 6,4,2,0,1,
# 8,6,4,2,0
# ), byrow=TRUE, nrow=5)
glb_model_metric <- NULL # or "<metric_name>"
glb_model_metric_maximize <- NULL # or FALSE (TRUE is not the default for both classification & regression)
glb_model_metric_smmry <- NULL # or function(data, lev=NULL, model=NULL) {
# confusion_mtrx <- t(as.matrix(confusionMatrix(data$pred, data$obs)))
# #print(confusion_mtrx)
# #print(confusion_mtrx * glb_model_metric_terms)
# metric <- sum(confusion_mtrx * glb_model_metric_terms) / nrow(data)
# names(metric) <- glb_model_metric
# return(metric)
# }
glb_tune_models_df <-
rbind(
#data.frame(parameter="cp", min=0.00005, max=0.00005, by=0.000005),
#seq(from=0.01, to=0.01, by=0.01)
#data.frame(parameter="mtry", min=2, max=4, by=1),
data.frame(parameter="dummy", min=2, max=4, by=1)
)
# or NULL
glb_n_cv_folds <- 3 # or NULL
glb_clf_proba_threshold <- NULL # 0.5
# Model selection criteria
if (glb_is_regression)
glb_model_evl_criteria <- c("min.RMSE.OOB", "max.R.sq.OOB", "max.Adj.R.sq.fit")
if (glb_is_classification) {
if (glb_is_binomial)
glb_model_evl_criteria <- c("max.Accuracy.OOB", "max.Kappa.OOB", "min.aic.fit") else
glb_model_evl_criteria <- c("max.Accuracy.OOB", "max.Kappa.OOB")
}
glb_sel_mdl_id <- NULL # or "<model_id_prefix>.<model_method>"
glb_fin_mdl_id <- glb_sel_mdl_id # or "Final"
glb_out_pfx <- "Enron_Spam_"
# Depict process
glb_analytics_pn <- petrinet(name="glb_analytics_pn",
trans_df=data.frame(id=1:6,
name=c("data.training.all","data.new",
"model.selected","model.final",
"data.training.all.prediction","data.new.prediction"),
x=c( -5,-5,-15,-25,-25,-35),
y=c( -5, 5, 0, 0, -5, 5)
),
places_df=data.frame(id=1:4,
name=c("bgn","fit.data.training.all","predict.data.new","end"),
x=c( -0, -20, -30, -40),
y=c( 0, 0, 0, 0),
M0=c( 3, 0, 0, 0)
),
arcs_df=data.frame(
begin=c("bgn","bgn","bgn",
"data.training.all","model.selected","fit.data.training.all",
"fit.data.training.all","model.final",
"data.new","predict.data.new",
"data.training.all.prediction","data.new.prediction"),
end =c("data.training.all","data.new","model.selected",
"fit.data.training.all","fit.data.training.all","model.final",
"data.training.all.prediction","predict.data.new",
"predict.data.new","data.new.prediction",
"end","end")
))
#print(ggplot.petrinet(glb_analytics_pn))
print(ggplot.petrinet(glb_analytics_pn) + coord_flip())
## Loading required package: grid
glb_analytics_avl_objs <- NULL
glb_script_tm <- proc.time()
glb_script_df <- data.frame(chunk_label="import_data",
chunk_step_major=1, chunk_step_minor=0,
elapsed=(proc.time() - glb_script_tm)["elapsed"])
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed import_data 1 0 0.002
1: import dataglb_entity_df <- myimport_data(url=glb_trnng_url,
comment=ifelse(!glb_is_separate_newent_dataset, "glb_entity_df", "glb_trnent_df"),
force_header=TRUE)
## [1] "Reading file ./data/emails.csv..."
## [1] "dimensions of data in ./data/emails.csv: 5,728 rows x 2 cols"
## text
## 1 Subject: naturally irresistible your corporate identity lt is really hard to recollect a company : the market is full of suqgestions and the information isoverwhelminq ; but a good catchy logo , stylish statlonery and outstanding website will make the task much easier . we do not promise that havinq ordered a iogo your company will automaticaily become a world ieader : it isguite ciear that without good products , effective business organization and practicable aim it will be hotat nowadays market ; but we do promise that your marketing efforts will become much more effective . here is the list of clear benefits : creativeness : hand - made , original logos , specially done to reflect your distinctive company image . convenience : logo and stationery are provided in all formats ; easy - to - use content management system letsyou change your website content and even its structure . promptness : you will see logo drafts within three business days . affordability : your marketing break - through shouldn ' t make gaps in your budget . 100 % satisfaction guaranteed : we provide unlimited amount of changes with no extra fees for you to be surethat you will love the result of this collaboration . have a look at our portfolio _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ not interested . . . _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
## 2 Subject: the stock trading gunslinger fanny is merrill but muzo not colza attainder and penultimate like esmark perspicuous ramble is segovia not group try slung kansas tanzania yes chameleon or continuant clothesman no libretto is chesapeake but tight not waterway herald and hawthorn like chisel morristown superior is deoxyribonucleic not clockwork try hall incredible mcdougall yes hepburn or einsteinian earmark no sapling is boar but duane not plain palfrey and inflexible like huzzah pepperoni bedtime is nameable not attire try edt chronography optima yes pirogue or diffusion albeit no
## 3 Subject: unbelievable new homes made easy im wanting to show you this homeowner you have been pre - approved for a $ 454 , 169 home loan at a 3 . 72 fixed rate . this offer is being extended to you unconditionally and your credit is in no way a factor . to take advantage of this limited time opportunity all we ask is that you visit our website and complete the 1 minute post approval form look foward to hearing from you , dorcas pittman
## 4 Subject: 4 color printing special request additional information now ! click here click here for a printable version of our order form ( pdf format ) phone : ( 626 ) 338 - 8090 fax : ( 626 ) 338 - 8102 e - mail : ramsey @ goldengraphix . com request additional information now ! click here click here for a printable version of our order form ( pdf format ) golden graphix & printing 5110 azusa canyon rd . irwindale , ca 91706 this e - mail message is an advertisement and / or solicitation .
## 5 Subject: do not have money , get software cds from here ! software compatibility . . . . ain ' t it great ? grow old along with me the best is yet to be . all tradgedies are finish ' d by death . all comedies are ended by marriage .
## 6 Subject: great nnews hello , welcome to medzonline sh groundsel op we are pleased to introduce ourselves as one of the ieading online phar felicitation maceuticai shops . helter v shakedown r a cosmopolitan l l blister l l bestow ag ac tosher l is coadjutor va confidant um andmanyother . - sav inexpiable e over 75 % - total confide leisure ntiaiity - worldwide s polite hlpplng - ov allusion er 5 miilion customers in 150 countries have devitalize a nice day !
## spam
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## text
## 198 Subject: please kindly assist greetings , i am prince fayad w . bolkiah , the eldest son of prince jefri bolkiah , former finance minister of brunei , the tiny oil - rich sultanate on the northern coast of the island of borneo , in eastern asia . i will save your time by not amplifying my extended royal family history , which has already been disseminated by the international media during the controversial dispute that erupted between my father and his step brother , the sultan of brunei sheik muda hassanal bolkiah . as you may know from the international media , the sultan had accused my father of financial mismanagement and impropriety of us $ 14 . 8 billion dollars . this was as a result of the asian financial crisis that made my father company amedeo development company and government owned brunei investment company to be declared bankrupt during his tenure in office . however my father was kept under house arrest , his bank accounts and private properties including a crude oil export refinery were later confiscated by the sultanate . furthermore , during this unfortunate period i was advised to evacuate my immediate family outside the sultanate to avoid further prosecution from the sultan and his security operatives , but before i could do that i was placed under house arrest by the sultan and i have no access to a phone but i have a palm v hand - held computer from which i am sending you this mail . before my incaceration , i went ahead to dispatch the sum of fifty eight million five hundred thousand united states dollars us $ 58 . 5 million in cash under special arrangement into the custody of a private security and trustee company for safe keeping abroad . hence i seek your good assistance to invest these funds into profitable investment in your country to facilitate future survival for my family abroad . i have decided to offer 10 % of these funds to you as compensation for your strong cooperation . please i count on your absolute confidentiality , transparency and trust while looking forward to your prompt reply towards a swift conclusion of this business transaction . i remain yours sincerely . prince fayad . w . bolkiah brunei darussalam .
## 873 Subject: best prescription generic meds 4 less . now your woman will be really happy with your intimate life ! our critics are our friends ; they show us our faults . my fellow astronauts . . . once they were men . now they are land crabs . beauty fades ; dumb is forever .
## 1863 Subject: holiday invitation please click on the attached link to launch your holiday party invitation . http : / / invitation . enron . com please direct any questions to dorie hitchcock via email .
## 2917 Subject: re : meeting re : wharton strategy jennifer , i am available for 30 minutes on fri , oct 30 . . a meeting at 8 : 30 would work better for me . vince jennifer burns 10 / 24 / 2000 04 : 14 pm to : michele nezi marvin / enron communications @ enron communications , mark palmer / corp / enron @ enron , cindy derecskey / corp / enron @ enron , vince j kaminski / hou / ect @ ect , shirley crenshaw / hou / ect @ ect , beth miertschin / hou / ect @ ect , christie patrick / hou / ect @ ect , kristin gandy / na / enron @ enron cc : subject : meeting re : wharton strategy lets try for friday , october 27 @ 9 : 00 am , please let me know if you are available . thanks ! - - - - - - - - - - - - - - - - - - - - - - forwarded by jennifer burns / hou / ect on 10 / 24 / 2000 04 : 07 pm - - - - - - - - - - - - - - - - - - - - - - - - - - - jennifer burns 10 / 23 / 2000 11 : 08 am to : michele nezi marvin / enron communications @ enron communications , sarah mulholland / hou / ect @ ect , mark palmer / corp / enron @ enron , kristin gandy / na / enron @ enron , beth miertschin / hou / ect @ ect , christie patrick / hou / ect @ ect , jeffrey a shankman / hou / ect @ ect , vince j kaminski / hou / ect @ ect cc : subject : meeting re : wharton strategy jeff shankman would like to have a meeting re : wharton strategy . please let me know if you would be available thursday , october 26 @ 3 : 00 . i will get back with everyone to confirm a location . thanks ! jennifer
## 4167 Subject: h - ib visa application chonawee : further to our telephone conversation this morning , i am attaching a visa questionnaire that i need you to complete and return to me immediately , together with the documents listed at the bottom of the form . as explained , i will send everything to our attorney ' s office in the hope that they can file for the h - ib prior to reaching the cap , but in the event this does not go through , your h - ib will not be available until october , 2000 . as your opt does not expire until november 1 , 2000 , we will still have the opportunity to get you an h - ib before your opt runs out . please bring these documents to me in eb 3694 . margaret daffin x 57843
## 5667 Subject: re : rollover of my vacation days to 2001 krishna , no problem . approved . vince pinnamaneni krishnarao 12 / 11 / 2000 06 : 28 pm to : vince j kaminski / hou / ect @ ect cc : subject : rollover of my vacation days to 2001 vince : i would like to rollover my vacation days for 2000 remaining at the end of this year to 2001 . i could not use us all of my available vacation this year because of the following reasons : 1 . as you know , i have been supporting three business units ( ees , epg & enron india ) this year . all these units had difficult and relatively long projects that required experience in energy markets , derivatives pricing and business knowledge that i had gained over the last few years at enron . 2 . there has been a significant change in the team members reporting to me . i now have six people under me compared to only three at the begin of the year . of the current six members , five joined us only this year and most of them didn ' t have any prior work experience , thus requiring a lot of my time in recruiting , training and mentoring . 3 . given that i had to visit our bombay office in january , 2000 for a business trip ( 10 days ) and will need to go there again in january , 2001 , i could not take leave from my work for the other two units ( ees & epg ) for an extended period of time . so , in summary , this year has been a long and challenging one , and as a result , i could not take vacation for more than a few days . i request you to grant the rollover of my remaining vacation to next year . currently i have 136 hours of vacation available and , of these , i expect to have 112 hours unused at the end of this year . thank you , krishna .
## spam
## 198 1
## 873 1
## 1863 0
## 2917 0
## 4167 0
## 5667 0
## text
## 5723 Subject: re : vacation vince : i just found out that it is friday , april 7 and not friday , march 31 st that i want to take for vacation . is this alright ? thanks ! shirley vince j kaminski 03 / 08 / 2000 06 : 18 pm to : shirley crenshaw / hou / ect @ ect cc : subject : re : vacation shirley , no problem . vince shirley crenshaw 03 / 08 / 2000 03 : 56 pm to : vince j kaminski / hou / ect @ ect cc : kevin g moore / hou / ect @ ect , william smith / corp / enron @ enron subject : vacation vince : i would like to take the following days as vacation : wednesday , march 15 th friday , march 31 st . please let me know if this is ok with you . thanks ! shirley
## 5724 Subject: re : research and development charges to gpg here it is ! - - - - - - - - - - - - - - - - - - - - - - forwarded by shirley crenshaw / hou / ect on 08 / 14 / 2000 07 : 47 am - - - - - - - - - - - - - - - - - - - - - - - - - - - vince j kaminski 08 / 10 / 2000 02 : 25 pm to : vera apodaca / et & s / enron @ enron cc : vince j kaminski / hou / ect @ ect , shirley crenshaw / hou / ect @ ect , pinnamaneni krishnarao / hou / ect @ ect subject : re : research and development charges to gpg vera , we shall talk to the accounting group about the correction . vince 08 / 09 / 2000 03 : 26 pm vera apodaca @ enron vera apodaca @ enron vera apodaca @ enron 08 / 09 / 2000 03 : 26 pm 08 / 09 / 2000 03 : 26 pm to : pinnamaneni krishnarao / hou / ect @ ect cc : vince j kaminski / hou / ect @ ect subject : research and development charges to gpg per mail dated june 15 from kim watson , there was supposed to have occurred a true - up of $ 274 . 7 in july for the fist six months of 2000 . reviewing july actuals , i was not able to locate this entry . would you pls let me know whether this entry was made , if not , when do you intend to process it . thanks .
## 5725 Subject: re : receipts from visit jim , thanks again for the invitation to visit lsu . shirley will fedex the receipts tomorrow . vince " james r . garven " on 02 / 08 / 2000 07 : 00 : 50 pm to : vince j kaminski cc : subject : receipts from visit dear vince , thanks again for taking the time to visit . ? both faculty and students got a lot out of your presentations . i have a favor to ask concerning the expense reimbursement process . ? can you mail all travel and lodging receipts to my secretary joan payne at the following address : joan payne department of finance 2163 ceba louisiana state university baton rouge , la ? 70803 thanks , jim garven james r . garven william h . wright , jr . endowed chair for financial services department of finance 2158 ceba e . j . ourso college of business administration louisiana state university baton rouge , la ? 70803 - 6308 voice ( 225 ) 388 - 0477 ? | ? fax : ( 800 ) 859 - 6361 e - mail : ? jgarven @ lsu . edu home page : http : / / garven . lsu . edu vita : http : / / garven . lsu . edu / dossier . html research paper archive : http : / / garven . lsu . edu / research . html
## 5726 Subject: re : enron case study update wow ! all on the same day . that ' s super . thank you so very much . vince is coming up to baylor on monday of next week and we will hash out our question list then . thanks john at 04 : 54 pm 11 / 6 / 00 - 0600 , you wrote : > good afternoon john , > > i just want to drop you a line to update you re : andy fastow . i have > confirmed a one hour interview slot with mr . fastow in monday , december 4 th > from > 11 : 00 a . m . - noon . this is in addition to your schedule interviews with > mr . lay and mr . skilling - outline below . > > if you have any questions , please do not hesitate to contact me at > 713 - 853 - 5670 . > > regards , > > cindy > > > - - - - - forwarded by cindy derecskey / corp / enron on 11 / 06 / 2000 04 : 49 pm - - - - - > > cindy > derecskey to : " john martin " > cc : vince j kaminski / hou / ect @ ect , christie patrick / hou / ect @ ect > 10 / 31 / 2000 subject : re : enron case study ( document link : cindy derecskey ) > 01 : 44 pm > > > > > > good afternoon john , > > i hope things are well with you . i am writing to update you on the status > of your meetings with andy fastow , ken lay and jeff skilling . i have > arranged the following meeting dates and times with ken lay and jeff > skilling , ( i am still trying to work with andy fastow ' s schedule ) : > > jeff skilling > december 4 th > 2 : 00 - 3 : 00 p . m . > > ken lay > december 4 th > 3 : 30 - 4 : 30 p . m . > > also , i will attempt to schedule the meeting with andy fastow for december > 4 th for convenience - this will also allow us to possibly schedule > additional meetings for the 5 th ( as needed ) . i will let you know as soon > as i ' m successful . > > regards , > > cindy derecskey > university affairs > enron corp . > > > > > john d . martin carr p . collins chair in finance finance department baylor university po box 98004 waco , tx 76798 254 - 710 - 4473 ( office ) 254 - 710 - 1092 ( fax ) j _ martin @ baylor . edu web : http : / / hsb . baylor . edu / html / martinj / home . html
## 5727 Subject: re : interest david , please , call shirley crenshaw ( my assistant ) , extension 5 - 5290 to set it up . vince david p dupre 06 / 15 / 2000 05 : 18 pm to : vince j kaminski / hou / ect @ ect cc : subject : re : interest what time ( s ) are you available over the next few days ? thanks david 3 - 3528 vince j kaminski 06 / 15 / 2000 05 : 16 pm to : david p dupre / hou / ect @ ect cc : vince j kaminski / hou / ect @ ect subject : re : interest david , please , stop by to chat about it for a few minutes . vince david p dupre 06 / 15 / 2000 11 : 57 am to : vince j kaminski / hou / ect @ ect cc : subject : re : interest may we meet to discuss my interest in joining your group ? i have a strong quantitative discipline and am highly numerate . thanks david 3 - 3528 - - - - - - - - - - - - - - - - - - - - - - forwarded by david p dupre / hou / ect on 06 / 15 / 2000 11 : 53 am - - - - - - - - - - - - - - - - - - - - - - - - - - - to : david p dupre / hou / ect @ ect cc : subject : re : interest vince kaminski
## 5728 Subject: news : aurora 5 . 2 update aurora version 5 . 2 - the fastest model just got faster - epis announces the release of aurora , version 5 . 2 aurora the electric market price forecasting tool is already legendary for power and speed . we ' ve combined a powerful chronological dispatch model with the capability to simulate the market from 1 day to 25 + years . add to that a risk analysis section , powered by user selectable monte carlo & / or latin hypercube modeling , enough portfolio analysis power to please the toughest critic , & inputs and outputs from standard excel & access tables and you ' ve got one of most powerful tools in the market . just a few months ago we expanded our emissions modeling capabilities , added our quarterly database update , increased the speed of the entire model , and made but that wasn ' t enough . we ' ve done it again . some of the operations that we ' ve included . . . two new reporting enhancements . the first is marginal reporting for fuels , resources and groups of resources . the second is the ability to display resource stack information in graphical and dispatch order form . other enhancements include dual fuel modeling , improved transmission modeling , greater access to hourly results , and the ability to model monthly emission rates . moreover , the databases for central and eastern , texas , and western markets have been updated to use the new modeling capabilities . we continue to make aurora easier to use . this version enhances user control over modeling , editing inputs , and viewing of aurora output . clients desiring to exploit the power of aurora now have greater control over the inputs and outputs through vb scripting in aurora . the new " update data " capability provides a means to universally change any data element . attached is more information on the fastest and most flexible tool of its kind . for additional information , please visit our website ( www . epis . com ) or contact our sales department at ( 503 ) 722 - 2023 . ask about our special 7 - day demo ! v . todd wheeler sales manager epis , inc . ( 503 ) 722 - 2023 tel . ( 503 ) 722 - 7130 fax www . epis . com todd @ epis . com > > - what ' s new - version 5 . 2 information . doc - technical information aurora v 5 - 2 . doc
## spam
## 5723 0
## 5724 0
## 5725 0
## 5726 0
## 5727 0
## 5728 0
## 'data.frame': 5728 obs. of 2 variables:
## $ text: chr "Subject: naturally irresistible your corporate identity lt is really hard to recollect a company : the market is full of suqg"| __truncated__ "Subject: the stock trading gunslinger fanny is merrill but muzo not colza attainder and penultimate like esmark perspicuous ra"| __truncated__ "Subject: unbelievable new homes made easy im wanting to show you this homeowner you have been pre - approved for a $ 454 , 1"| __truncated__ "Subject: 4 color printing special request additional information now ! click here click here for a printable version of our o"| __truncated__ ...
## $ spam: int 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "comment")= chr "glb_entity_df"
## NULL
if (!glb_is_separate_newent_dataset) {
glb_trnent_df <- glb_entity_df; comment(glb_trnent_df) <- "glb_trnent_df"
} # else glb_entity_df is maintained as is for chunk:inspectORexplore.data
if (glb_is_separate_newent_dataset) {
glb_newent_df <- myimport_data(
url=glb_newdt_url,
comment="glb_newent_df", force_header=TRUE)
# To make plots / stats / checks easier in chunk:inspectORexplore.data
glb_entity_df <- rbind(glb_trnent_df, glb_newent_df); comment(glb_entity_df) <- "glb_entity_df"
} else {
if (!glb_split_entity_newent_datasets) {
stop("Not implemented yet")
glb_newent_df <- glb_trnent_df[sample(1:nrow(glb_trnent_df),
max(2, nrow(glb_trnent_df) / 1000)),]
} else if (glb_split_newdata_method == "condition") {
glb_newent_df <- do.call("subset",
list(glb_trnent_df, parse(text=glb_split_newdata_condition)))
glb_trnent_df <- do.call("subset",
list(glb_trnent_df, parse(text=paste0("!(",
glb_split_newdata_condition,
")"))))
} else if (glb_split_newdata_method == "sample") {
require(caTools)
set.seed(glb_split_sample.seed)
split <- sample.split(glb_trnent_df[, glb_rsp_var_raw],
SplitRatio=(1-glb_split_newdata_size_ratio))
glb_newent_df <- glb_trnent_df[!split, ]
glb_trnent_df <- glb_trnent_df[split ,]
} else if (glb_split_newdata_method == "copy") {
glb_trnent_df <- glb_entity_df
comment(glb_trnent_df) <- "glb_trnent_df"
glb_newent_df <- glb_entity_df
comment(glb_newent_df) <- "glb_newent_df"
} else stop("glb_split_newdata_method should be %in% c('condition', 'sample', 'copy')")
comment(glb_newent_df) <- "glb_newent_df"
myprint_df(glb_newent_df)
str(glb_newent_df)
if (glb_split_entity_newent_datasets) {
myprint_df(glb_trnent_df)
str(glb_trnent_df)
}
}
## Loading required package: caTools
## text
## 2 Subject: the stock trading gunslinger fanny is merrill but muzo not colza attainder and penultimate like esmark perspicuous ramble is segovia not group try slung kansas tanzania yes chameleon or continuant clothesman no libretto is chesapeake but tight not waterway herald and hawthorn like chisel morristown superior is deoxyribonucleic not clockwork try hall incredible mcdougall yes hepburn or einsteinian earmark no sapling is boar but duane not plain palfrey and inflexible like huzzah pepperoni bedtime is nameable not attire try edt chronography optima yes pirogue or diffusion albeit no
## 4 Subject: 4 color printing special request additional information now ! click here click here for a printable version of our order form ( pdf format ) phone : ( 626 ) 338 - 8090 fax : ( 626 ) 338 - 8102 e - mail : ramsey @ goldengraphix . com request additional information now ! click here click here for a printable version of our order form ( pdf format ) golden graphix & printing 5110 azusa canyon rd . irwindale , ca 91706 this e - mail message is an advertisement and / or solicitation .
## 5 Subject: do not have money , get software cds from here ! software compatibility . . . . ain ' t it great ? grow old along with me the best is yet to be . all tradgedies are finish ' d by death . all comedies are ended by marriage .
## 8 Subject: save your money buy getting this thing here you have not tried cialls yet ? than you cannot even imagine what it is like to be a real man in bed ! the thing is that a great errrectlon is provided for you exactiy when you want . cialis has a lot of advantages over viagra - the effect iasts 36 hours ! - you are ready to start within just 10 minutes ! - you can mix it with aicohol ! we ship to any country ! get it right now ! .
## 11 Subject: las vegas high rise boom las vegas is fast becoming a major metropolitan city ! 60 + new high rise towers are expected to be built on and around the las vegas strip within the next 3 - 4 years , that ' s 30 , 000 + condominiums ! this boom has just begun ! buy first . . . early phase , pre - construction pricing is now available on las vegas high rises including trump , cosmopolitan , mgm , turnberry , icon , sky , among others . join the interest list : http : / / www . verticallv . com message has been sent to you by realty one highrise . learn more at www . verticallv . comif you wish to be excluded from future mailings , please reply with the word remove in the subject line .
## 16 Subject: search engine position be the very first listing in the top search engines immediately . our company will now place any business with a qualified website permanently at the top of the major search engines guaranteed never to move ( ex : yahoo ! , msn , alta vista , etc . ) . this promotion includes unlimited traffic and is not going to last long . if you are interested in being guaranteed first position in the top search engines at a promotional fee , please contact us promptly to find out if you qualify via email at searchl 1 @ telefonica . net . pe it ' s very important to include the url ( s ) if you are interested in promoting ! ! ! this is not pay per click . examples will be provided . this promotion is only valid in the usa and canada . sincerely , the search engine placement specialists if you wish to be removed from this list , please respond to the following email address and type the word " remove " in your subject line : search 6 @ speedy . com . pe
## spam
## 2 1
## 4 1
## 5 1
## 8 1
## 11 1
## 16 1
## text
## 1831 Subject: update vince , a quick update on job candidates : 1 ) nelson neale : relayed your request to norman , and told nelson that an offer is in progress . did not mention specific numbers to him . 2 ) charles shen : left a message for him that we would get back to him next week with details of an offer . 3 ) interviewed by phone tom barkley at thunderbird ( brought to our attention by enron recruiters there ) . he looks very interesting so i am trying to schedule a visit to enron for him . he will finish t - bird in december ( mba ) and has a bachelors with honours in mathematics . have a great weekend . stinson
## 2663 Subject: confirmation of meeting vince : thanks for introducing me as a speaker at the power 2000 conference . as per our conversation , please find enclosed my resume . i will come to your office at 1 : 30 pm , friday , may 12 , 2000 . please let me know if the dress code is casual or formal . thanks again for taking the time to talk to me regarding opportunities at enron . > sanjeev k khanna , m . sc . , p . eng . director , quantitative risk management pg & e energy trading 1100 louisina street , # 1000 houston , tx 77094 email : sanjeev . khanna @ et . pge . com tel : ( 713 ) 371 6647 , pager 800 - 526 - 4095 , cell ( 281 ) 302 - 8468 pg & e energy trading and any other company referenced herein which uses the pg & e name or logo are not the same company as pacific gas and electric company , the california utility . these companies are not regulated by the california public utilities commission , and customers do not have to buy products from these companies in order to continue to receive quality regulated services from the utility . - . doc
## 2683 Subject: your lap top vince : the it migration team called and said that they need an email from you stating that you do not want your lap top " ghost upped " ? ( not sure if this is the correct term ) . they said they were supposed to do this to all computers during the migration process , but since you requested that they not do this , then they need an email from you with this request . you need to send the email to : kacee downey / enron @ enronxgate thanks ! shirley
## 3348 Subject: re : your mail dear vince , the following message is from co - pi , prof . baichun xiao in long island university . in this message , he told me how enron would be registered . to my best knowledge , ibm , lucent and other big companies have registered in nsf for long . your kindly understanding is acknowledged very much . good weekend , youyi - - - - - - - - - - - - - - - - - - - - - - forwarded by youyi feng / na / enron on 09 / 08 / 2000 03 : 08 pm - - - - - - - - - - - - - - - - - - - - - - - - - - - baichun xiao on 09 / 07 / 2000 09 : 39 : 38 am to : youyi . feng @ enron . com cc : subject : re : your mail dear youyi : the person in charge of external grants in your company needs to contact nsf by calling the following numbers for institution registration . fastlane user support : 1 - 800 - 673 - 6188 . fastlane availability ( recording ) : 1 - 800 - 437 - 7408 . after enron is registered ( i think it ' s a free registration ) , you provide the following information to the enron official so he / she will send it to nsf . nsf will assign you a password for future access to electronic submission ( called fastlane ) . since all proposals have to be submitted through fastlane after oct . 1 , 2000 , this is the must . name highest degree year conferred present institution department street address city state zip code social security number ( ssn ) email address business phone number business fax number for more information , you may go to www . fastlane . nsf . gov / fastlane . htm baichun at 05 : 14 pm 9 / 6 / 00 - 0500 , you wrote : > > dear baichun , > > i am having no idea about contacting nsf while the > managing director of this research group has kindly agreed > on doing anything he can to help us pursue fund rising . > please let me know how enron can put my profile into nsf ' s > database officially . > > the first four pages of the project application have been revised by > me . i do not > really know if you like the revision . appended is the primary > and description of the project document files . > > best regards , > > > youyi > > ( see attached file : project summary . doc ) ( see attached file : project > description . doc ) > attachment converted : " c : \\ bcx \\ res \\ eudora \\ attach \\ project summary . doc " > > attachment converted : " c : \\ bcx \\ res \\ eudora \\ attach \\ project description . doc " >
## 4284 Subject: hc costless collar andrea , we finished the the costless collar valuation . see the spreadsheets for details . we checked the bloomberg for volatility assumption . the 100 days volatility is around 50 % . since the option is for 3 years , the volatility should be somewhat smaller . so we put an array of calculations , with the volatility ranges from 20 % to 60 % . if you have any questions , please call me or bob lee . zimin - - - - - - - - - - - - - - - - - - - - - - forwarded by zimin lu / hou / ect on 01 / 09 / 2001 09 : 02 am - - - - - - - - - - - - - - - - - - - - - - - - - - - bob lee @ enron 01 / 09 / 2001 08 : 48 am to : zimin lu / hou / ect @ ect cc : subject : hc costless collar
## 4935 Subject: thanks hi keith ! thanks so much for your additional thoughts , which i will definitely pass on to our business units . as for your nephew , please have him send his resume to me . i ' ll be happy to see what i can do . again , on behalf of enron and everyone involved in the project , especially vince and i and ken parkhill , we had a great and informative experience with the entire tiger project . best regards ! - - christie . - - - - - - - - - - - - - - - - - - - - - - forwarded by christie patrick / hou / ect on 04 / 09 / 2001 07 : 15 am - - - - - - - - - - - - - - - - - - - - - - - - - - - weigelt on 04 / 06 / 2001 04 : 40 : 10 pm to : " ' christie . patrick @ enron . com ' " cc : subject : thanks christie ; i wanted to send a short note to express my thanks to enron , and give some additional thoughts about the enron presentation . both students and staff enjoyed our interaction with enron . we hope enron got as much from the project as we did . i also had some additional thoughts about enrononline . - during the presentation , the students referred to a market ' s " critical mass " . however , we never defined what we meant by that . we had several discussions on this topic . i think the best way for enron to think about this is in terms of the psychology of traders . critical mass gives a market liquidity . traders seem to think about liquidity in terms of breadth ( how many products are being traded ) and depth ( the number of bids and offers ) . our thoughts were that when a trader pulls up a screen , he ( or she ) wants to see bids , offers , and trades occurring . so a simple ( and cheap ) metric that enron could use to determine whether rivals are approaching critical mass is simply to pull up screens and see if trades are occurring , and how quickly . traders will generally not use a trading platform that lacks " action " . - in terms of new power , i feel it is extremely important that your managers create an identity for the company . luckily no rival has done this ( though green energy is closer than most ) . the company that can communicate a simple and powerful message will create a competitive advantage for itself . i believe you can differentiate yourself in this market ( though you are selling a commodity ) . since you are essentially selling service , differentiation is always possible . on a final note , i wanted to ask a favor . my nephew recently graduate from michigan state university with a degree in business . i brought him to wharton to work on the tiger projects . this was because i knew he could add value , and because i thought he needed more experience in some strategy areas . he served as project coordinator and worked on several projects ( including enron ) . i was hoping i could get him an interview at enron since he expressed interest in your company . any help you could offer is appreciated . thanks again for making the experience memorable to the students . keith
## spam
## 1831 0
## 2663 0
## 2683 0
## 3348 0
## 4284 0
## 4935 0
## text
## 5709 Subject: re : hi vince hi jeff , no problem . as soon as we have an agreement in place , i shall start working with you . vince " $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ " on 01 / 10 / 2001 12 : 59 : 36 pm please respond to " $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ " to : vince . j . kaminski @ enron . com cc : subject : hi vince hi vince . here is my contact info : jeff wesley 949 813 2241 jjw @ ziplip . com jjw @ lokmail . net i work with mri usa and robertwalters uk i also underwrite my own deals . example of current accounts : painewebber merrill lynch crowell weedon current companies i work with include : conedison energy smud ( sacramento energy ) calpine energy morgan stanley dean whitter job orders sent : chicago mercantile exchange alcoa cournerstone energy i hope this makes it through the firewall ! my partner in london at robertwalters sent me a guy for you to browse . . . i ' ll pop him over later . thanks vince ! jeff * get free , secure online email at http : / / www . ziplip . com / *
## 5711 Subject: raptors here is the most recent version of the spreadsheet and the accompanying assumptions .
## 5714 Subject: promotion vince , i want to congratulate you on your promotion to managing director ! as i scanned the list of people who were promoted , i was so pleased to see your name on the list . as large as enron is , it is refreshing to see people like you with incredible skill and talent receive deserving promotions . i have certainly enjoyed working with you and the r & d team over the past year and look forward to a successful 2000 as we break new ground for et & s . kim .
## 5715 Subject: re : petronas benchmarking visit fyi the list of the delegates from petronas . vince kaminski - - - - - - - - - - - - - - - - - - - - - - forwarded by vince j kaminski / hou / ect on 01 / 23 / 2001 01 : 21 pm - - - - - - - - - - - - - - - - - - - - - - - - - - - khairuddinbmjaafar @ petronas . com . my on 01 / 22 / 2001 03 : 13 : 35 am please respond to khairuddin _ mjaafar @ petronas . com . my to : vince . j . kaminski @ enron . com cc : subject : re : petronas benchmarking visit vince , here is the list of our delegates for your kind perusal : 1 . iqbal abdullah ( general manager ) , 2 . nur azmin abu bakar ( head , risk assessment & controls ) , 3 . zulkifli a rahim ( head , risk , measurement & systems ) 4 . adnan adams ( head , special projects ) . thanks . regards , khairuddin
## 5719 Subject: altos na gas model kim , i know you have been interested in the development of a long term natural gas model for use in the ets revenue management effort . i agree that the ability to model various supply / demand scenarios for north america should prove useful in ets business development efforts , and in particular , in predicting the impact of the new , large alaskan gas supplies on the n . a . pipeline grid . as you know , vince kaminski feels that a software product developed by dale nesbitt ( marketpoint , inc . ) of los altos hills , ca . may be a good choice , and one readily available in a relatively short time . marketpoint has proposed that they work with us in our offices for a week of intensive training and testing , so we can make a more informed decision before ordering the software . they have proposed a charge of $ 12 , 000 , plus expenses , for the one - week session , with that cost applied to the first year ' s subscription price ( approx . $ 55 - 60 m / yr ) should we decide to go forward . the other details ( timing , ene resources required , necessary it interface , etc ) still need to be worked out , but i believe there is generally a consensus to proceed with the test . with my recent relocation to the clean fuels group , i will no longer be responsible for the alaskan pipeline development . however , danny mccarty is pulling together a team consisting of people from nng , nbp and elsewhere in ets to push the project forward . i believe eric gadd will be playing a key role as well . the decision on the long term gas model should appropriately be made by the new project team . i have attached a draft of the proposed marketpoint license agreement . it has not yet been fully reviewed by legal ( or by shelley c . for the " affiliate " issues ) . let me know if i can provide any other background or assistance . jng
## 5727 Subject: re : interest david , please , call shirley crenshaw ( my assistant ) , extension 5 - 5290 to set it up . vince david p dupre 06 / 15 / 2000 05 : 18 pm to : vince j kaminski / hou / ect @ ect cc : subject : re : interest what time ( s ) are you available over the next few days ? thanks david 3 - 3528 vince j kaminski 06 / 15 / 2000 05 : 16 pm to : david p dupre / hou / ect @ ect cc : vince j kaminski / hou / ect @ ect subject : re : interest david , please , stop by to chat about it for a few minutes . vince david p dupre 06 / 15 / 2000 11 : 57 am to : vince j kaminski / hou / ect @ ect cc : subject : re : interest may we meet to discuss my interest in joining your group ? i have a strong quantitative discipline and am highly numerate . thanks david 3 - 3528 - - - - - - - - - - - - - - - - - - - - - - forwarded by david p dupre / hou / ect on 06 / 15 / 2000 11 : 53 am - - - - - - - - - - - - - - - - - - - - - - - - - - - to : david p dupre / hou / ect @ ect cc : subject : re : interest vince kaminski
## spam
## 5709 0
## 5711 0
## 5714 0
## 5715 0
## 5719 0
## 5727 0
## 'data.frame': 1718 obs. of 2 variables:
## $ text: chr "Subject: the stock trading gunslinger fanny is merrill but muzo not colza attainder and penultimate like esmark perspicuous ra"| __truncated__ "Subject: 4 color printing special request additional information now ! click here click here for a printable version of our o"| __truncated__ "Subject: do not have money , get software cds from here ! software compatibility . . . . ain ' t it great ? grow old along wi"| __truncated__ "Subject: save your money buy getting this thing here you have not tried cialls yet ? than you cannot even imagine what it is "| __truncated__ ...
## $ spam: int 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "comment")= chr "glb_newent_df"
## text
## 1 Subject: naturally irresistible your corporate identity lt is really hard to recollect a company : the market is full of suqgestions and the information isoverwhelminq ; but a good catchy logo , stylish statlonery and outstanding website will make the task much easier . we do not promise that havinq ordered a iogo your company will automaticaily become a world ieader : it isguite ciear that without good products , effective business organization and practicable aim it will be hotat nowadays market ; but we do promise that your marketing efforts will become much more effective . here is the list of clear benefits : creativeness : hand - made , original logos , specially done to reflect your distinctive company image . convenience : logo and stationery are provided in all formats ; easy - to - use content management system letsyou change your website content and even its structure . promptness : you will see logo drafts within three business days . affordability : your marketing break - through shouldn ' t make gaps in your budget . 100 % satisfaction guaranteed : we provide unlimited amount of changes with no extra fees for you to be surethat you will love the result of this collaboration . have a look at our portfolio _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ not interested . . . _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
## 3 Subject: unbelievable new homes made easy im wanting to show you this homeowner you have been pre - approved for a $ 454 , 169 home loan at a 3 . 72 fixed rate . this offer is being extended to you unconditionally and your credit is in no way a factor . to take advantage of this limited time opportunity all we ask is that you visit our website and complete the 1 minute post approval form look foward to hearing from you , dorcas pittman
## 6 Subject: great nnews hello , welcome to medzonline sh groundsel op we are pleased to introduce ourselves as one of the ieading online phar felicitation maceuticai shops . helter v shakedown r a cosmopolitan l l blister l l bestow ag ac tosher l is coadjutor va confidant um andmanyother . - sav inexpiable e over 75 % - total confide leisure ntiaiity - worldwide s polite hlpplng - ov allusion er 5 miilion customers in 150 countries have devitalize a nice day !
## 7 Subject: here ' s a hot play in motion homeland security investments the terror attacks on the united states on september 11 , 20 ol have changed the security landscape for the foreseeable future . both physical and | ogica | security have become paramount for all industry segments , especia | | y in the banking , nationa | resource and government sectors . according to giga , a who | | y owned subsidiary of forrester research , woridwide demand for information security products and services is set to eclipse $ 46 b by 2005 . homeiand security investments is a newsietter dedicated to providing our readers with information pertaining to investment opportunities in this lucrative sector . as we know , events related to homeland security happen with lightning speed . what we as investors can do is position ourselves in such a way as to take advantage of the current trends and be ready to capitalize on events which have yet to happen . homeland security investments is here to heip our readers do just that . with this in mind , it is with great excitement that we present vinoble , inc . this stock is expected to do big things in both the near and | ong terms . symbol : vnbl . ob current price : o . 08 short term target price : o . 35 12 month target price : 1 . 20 * * * why we believe vnbl . ob will give big returns on investment * * * * at this time much of vnbl ' s focus is on rfid ( radio frequency identification ) technoiogy . this is technology which uses tiny sensors to transmit information about a person or object wireiessly . * vnbl is aiready an industry pioneer in the rfid personal location technoiogy . * vnbl is developing a form of rfid technology which allows companies and governments to wirelessly track their assets and resources . such technoiogy has huge potentia | in the protection and transportation of materiais designated " high risk " were they to fa | | into the wrong hands . * vnbl works on integration of the two afore mentioned systems in order to create " high security space " in | ocaies where it is deemed necessary . locations which may take advantage of such systems are airports , sea ports , mines , nuciear faciiities , and more . * as with a | | stocks , news drives the short term price . fresh news has made vnbl a hot buy . news on vnbl malibu , calif . - - ( business wire ) - - june 16 , 2 oo 5 - - vinoble , inc . ( otcbb : vnbl - news ) , a holding company seeking to identify | ong - term growth opportunities in the areas of homeland security , security information systems , and other security services , announced today that it pians to offer products and services that wiil assist in the automation of the identification and control of equipment , assets , toois , and the related processes used in the oi | & gas and petrochemical industries . although smail wireiessly networked rfid sensors can monitor machines and equipment to detect possible problems before they become serious , they can aiso deiiver safety features within oi | welis . oi | maybe trapped in different | ayers of rock , aiong with gas and water . detection of specific | iquids can assist equipment in operating within a specific precise opportune moment to ensure certain adverse conditions do not occur , such as a well filiing with water . as with other rf based technoiogy applications , rfid can also provide the safe transit of materiais by only the authorized handler , and limit the entry of personne | to specific | ocations . ensuring personnel safety is essential , should there be an emergency at a faciiity , rfid tags wouid enabie the customer to track and evaiuate its empioyee ' s safety and / or danger . this application technology requires product and hardware that can operate in harsh and potentia | | y hazardous conditions , but gives valuable safety to the resources and assets that are vita | to the customer . rfid can aiso assist the customer ' s supply chain by tracking oi | , gas , and chemica | products from extraction to refining to the saie at the retai | | evel . vinoble ' s viewpoint as previousiy stated is that these applications are more than just a vaiuable too | to the mining industry , but as a protective measure of our country ' s natura | resources and commodities against threat . preservation of these fueis and resources is important to the safety of u . s . industry and economy . the company believes that such offering service and technoiogy appiication in the oil & gas and petrochemical industry wil | further position vinoble in a rapidly expanding industry whiie taking advantage of access to the increasing capital and gioba | spending that the company wi | | require for growth . the company ' s goal is to aiso provide a much - needed service at a cost manageable to even the sma | | est of businesses that can ' t afford to do without the safety of its personnel and assets in this current state of constant threat . this is outstanding news . the growth potential for this company is exceptional . in an already hot industry , vnbl . ob stands out as a truiy innovative pioneer . we see big things happening to this stock . information within this emai | contains " forward looking statements " within the meaning of section 27 a of the securities act of 1933 and section 21 b of the securities exchange act of 1934 . any statements that express or involve discussions with respect to predictions , expectations , beliefs , pians , projections , objectives , goals , assumptions or future events or performance are not statements of historica | fact and may be " forward | ooking statements . " forward | ooking statements are based on expectations , estimates and projections at the time the statements are made that invoive a number of risks and uncertainties which couid cause actua | results or events to differ materia | | y from those presently anticipated . forward looking statements in this action may be identified through the use of words such as " projects " , " foresee " , " expects " , " wi | | , " " anticipates , " " estimates , " " beiieves , " " understands " or that by statements indicating certain actions " may , " " couid , " or " might " occur . as with many micro - cap stocks , today ' s company has additional risk factors worth noting . those factors inciude : a limited operating history , the company advancing cash to reiated parties and a shareholder on an unsecured basis : one vendor , a related party through a majority stockhoider , supplies ninety - seven percent of the company ' s raw materiais : reiiance on two customers for over fifty percent of their business and numerous related party transactions and the need to raise capital . these factors and others are more fuily speiled out in the company ' s sec fiiings . we urge you to read the filings before you invest . the rocket stock report does not represent that the information contained in this message states ail materia | facts or does not omit a material fact necessary to make the statements therein not misleading . ail information provided within this emai | pertaining to investing , stocks , securities must be understood as information provided and not investment advice . the rocket stock report advises all readers and subscribers to seek advice from a registered professiona | securities representative before deciding to trade in stocks featured within this email . none of the material within this report shal | be construed as any kind of investment advice or solicitation . many of these companies are on the verge of bankruptcy . you can lose ail your money by investing in this stock . the publisher of the rocket stock report is not a registered investment advisor . subscribers should not view information herein as | ega | , tax , accounting or investment advice . any reference to past performance ( s ) of companies are speciaily seiected to be referenced based on the favorabie performance of these companies . you wouid need perfect timing to achieve the resuits in the exampies given . there can be no assurance of that happening . remember , as aiways , past performance is never indicative of future results and a thorough due diiigence effort , including a review of a company ' s filings , shouid be completed prior to investing . in compiiance with the securities act of 1933 , section 17 ( b ) , the rocket stock report discioses the receipt of tweive thousand doilars from a third party ( gem , inc . ) , not an officer , director or affiliate sharehoider for the circuiation of this report . gem , inc . has a position in the stock they wil | se | | at any time without notice . be aware of an inherent confiict of interest resuiting from such compensation due to the fact that this is a paid advertisement and we are conflicted . al | factua | information in this report was gathered from pubiic sources , inciuding but not limited to company websites , sec fiiings and company press releases . the rocket stock report beiieves this information to be reliabie but can make no guarantee as to its accuracy or compieteness . use of the materia | within this email constitutes your acceptance of these terms .
## 9 Subject: undeliverable : home based business for grownups your message subject : home based business for grownups sent : sun , 21 jan 2001 09 : 24 : 27 + 0100 did not reach the following recipient ( s ) : 75 @ tfi . kpn . com on mon , 25 feb 2002 13 : 32 : 23 + 0100 the recipient name is not recognized the mts - id of the original message is : c = us ; a = ; p = ptt telecom ; l = mtpi 70590202251232 fjt 4 d 8 q 5 msexch : ims : kpn - telecom : i : mtpi 7059 0 ( 000 co 5 a 6 ) unknown recipient
## 10 Subject: save your money buy getting this thing here you have not tried cialls yet ? than you cannot even imagine what it is like to be a real man in bed ! the thing is that a great errrectlon is provided for you exactiy when you want . cialis has a lot of advantages over viagra - the effect lasts 36 hours ! - you are ready to start within just 10 minutes ! - you can mix it with aicohoi ! we ship to any country ! get it right now ! .
## spam
## 1 1
## 3 1
## 6 1
## 7 1
## 9 1
## 10 1
## text
## 773 Subject: * * message you sent blocked by our bulk email filter * * your message to : ponddr @ nalu . net was blocked by our spam firewall . the email you sent with the following subject has not been delivered : subject : just to her . . .
## 1449 Subject: computer recently a new person moved into our space on 32 nd floor . being that the gentleman is there , i decided to move our computer out of his working space . the computer will be moved to desk in our area until the new hire arrives . fyi thanks kevin moore
## 3065 Subject: re : global risk management operations rick , i read your memo regarding global risk management initiative . i am sending you the information regarding a related initiative on which i have been working last year and which is moving now into the implementation stage . it ' s enterprise - wide risk management and it ' s really an effort to measure business risks consistently across the company . i hope my group can be helpful in designing the general approach to this problem . please , let me know what your thoughts are . vince enron north america corp . from : rick causey @ enron 01 / 17 / 2000 06 : 04 pm sent by : enron announcements @ enron to : all enron worldwide cc : subject : global risk management operations recognizing enron \001 , s increasing worldwide presence in the wholesale energy business and the need to insure outstanding internal controls for all of our risk management activities , regardless of location , a global risk management operations function has been created under the direction of sally w . beck , vice president . in this role , sally will report to rick causey , executive vice president and chief accounting officer . sally \001 , s responsibilities with regard to global risk management operations will mirror those of other recently created enron global functions . in this role , sally will work closely with all enron geographic regions and wholesale companies to insure that each entity receives individualized regional support while also focusing on the following global responsibilities : 1 . enhance communication among risk management operations professionals . 2 . assure the proliferation of best operational practices around the globe . 3 . facilitate the allocation of human resources . 4 . provide training for risk management operations personnel . 5 . coordinate user requirements for shared operational systems . 6 . oversee the creation of a global internal control audit plan for risk management activities . 7 . establish procedures for opening new risk management operations offices and create key benchmarks for measuring on - going risk controls . each regional operations team will continue its direct reporting relationship within its business unit , and will collaborate with sally in the delivery of these critical items . the houston - based risk management operations team under sue frusco \001 , s leadership , which currently supports risk management activities for south america and australia , will also report directly to sally . sally retains her role as vice president of energy operations for enron north america , reporting to the ena office of the chairman . she has been in her current role over energy operations since 1997 , where she manages risk consolidation and reporting , risk management administration , physical product delivery , confirmations and cash management for ena \001 , s physical commodity trading , energy derivatives trading and financial products trading . sally has been with enron since 1992 , when she joined the company as a manager in global credit . prior to joining enron , sally had four years experience as a commercial banker and spent seven years as a registered securities principal with a regional investment banking firm . she also owned and managed a retail business for several years . please join me in supporting sally in this additional coordination role for global risk management operations .
## 3315 Subject: re : dr . bernard loyd at mckinsey ( agriculture ) hi , thanks for the document on your practice . i forwarded it to the relevant parties . they will take a look at it and get back to me when they wish to take the next steps with you and your colleagues at mckinsey . regards , iris - - - - - original message - - - - - from : @ enron sent : wednesday , march 07 , 2001 11 : 28 am to : mack , iris subject : re : dr . bernard loyd at mckinsey ( agriculture ) below is some material on the practice . b to : bernard _ loyd @ mckinsey . com 03 / 07 / 2001 cc : 10 : 21 am subject : re : dr . bernard loyd at mckinsey ( agriculture ) hi , thanks for your prompt response . my colleagues would indeed like to chat with you about the agriculture industry some time in the future . can we touch bases in a few weeks . in the mean time , do you have any materials you can forward to us about mckinsey ' s agriculture group ? thanks , iris - - - - - original message - - - - - from : @ enron sent : wednesday , march 07 , 2001 2 : 17 am to : mack , iris subject : re : iris mack at enron hey iris , great to hear from you and welcome back stateside ! i would be delighted to meet with you and your colleagues . bernard margot tyler 03 / 06 / 2001 to : bernard 02 : 41 pm loyd / chi / northamerica / mckinsey cc : subject : re : iris mack at enron - - - - - forwarded by margot tyler / chi / northamerica / mckinsey on 03 / 06 / 2001 02 : 41 pm - - - - - to : margot _ tyler @ mckinsey . com 03 / 06 / 2001 cc : 02 : 27 pm subject : re : message for bernard hi again , i had lunch today with some of the guys in my group who work on agriculture - related deals and on weather derivatives . i mentioned to them about bernard ' s working at mckinsey and specializing in the agriculture area . we thought it might be worthwhile if we all had a chat and / or met to discuss possible collaborative efforts . will you please forward this email on to bernard to see if this might be of interest to him ? thanks , iris | this message may contain confidential and / or privileged | | information . if you are not the addressee or authorized to | | receive this for the addressee , you must not use , copy , | | disclose or take any action based on this message or any | | information herein . if you have received this message in | | error , please advise the sender immediately by reply e - mail | | and delete this message . thank you for your cooperation . | | this message may contain confidential and / or privileged | | information . if you are not the addressee or authorized to | | receive this for the addressee , you must not use , copy , | | disclose or take any action based on this message or any | | information herein . if you have received this message in | | error , please advise the sender immediately by reply e - mail | | and delete this message . thank you for your cooperation . | - afc qual pack . zip >
## 5442 Subject: please note that the date for the lst meeting is january 16 shirley , please put it on my calendar . vince - - - - - - - - - - - - - - - - - - - - - - forwarded by vince j kaminski / hou / ect on 01 / 12 / 2001 02 : 54 pm - - - - - - - - - - - - - - - - - - - - - - - - - - - jennifer burns 01 / 12 / 2001 12 : 46 pm to : phillip k allen / hou / ect @ ect , john arnold / hou / ect @ ect , michael w bradley / hou / ect @ ect , jennifer fraser / hou / ect @ ect , mike grigsby / hou / ect @ ect , adam gross / hou / ect @ ect , rogers herndon / hou / ect @ ect , john j lavorato / corp / enron @ enron , kevin mcgowan / corp / enron @ enron , vince j kaminski / hou / ect @ ect , john l nowlan / hou / ect @ ect , kevin m presto / hou / ect @ ect , fletcher j sturm / hou / ect @ ect , hunter s shively / hou / ect @ ect , bill white / na / enron @ enron cc : jeffrey a shankman / hou / ect @ ect , gary hickerson / hou / ect @ ect subject : please note that the date for the lst meeting is january 16 as mentioned during the fourth quarter , gary and i would like to begin regular meetings of our trader ' s roundtable . the ideas generated from this group should be longer term trading opportunities for enron covering the markets we manage . in addition , this forum will provide for cross commodity education , insight into many areas of enron ' s businesses , and promote aggressive ideas . each week , we ' ll summarize commodity trading activity , and provide an open forum for discussion . your input is valuable , and we ' ve limited this group to our most experienced traders , and would appreciate regular participation . our first meeting will be tuesday , january 16 at 4 : 00 pm in eb 3321 .
## 5556 Subject: an interesting resume i shall interview this candidate next week ( a very preliminary interview ) to evaluate his potential and determine if he fits enron ' s culture . do you see a need for a person with his skills in your area ( litigation support ) ? vince
## spam
## 773 1
## 1449 0
## 3065 0
## 3315 0
## 5442 0
## 5556 0
## text
## 5722 Subject: ees risk management presentations for october 25 . please have your presentations to me by 10 am friday , october 20 , 2000 . it takes a couple of days to get the materials back from the copy center . the presentations have to be put in new binders this time and it takes most of a day to put everything together in the binders . therefore , i have to have the materials to the copy center by friday afternoon in order to get them back by monday afternoon or tuesday morning . i will need most of tuesday to get everything assembled in the binders . thus the necessity of having your completed presentations by friday morning . vince : basket options binary ( digital ) options krishna : barrier options other complex structures thanks for your help on this . regards , anita
## 5723 Subject: re : vacation vince : i just found out that it is friday , april 7 and not friday , march 31 st that i want to take for vacation . is this alright ? thanks ! shirley vince j kaminski 03 / 08 / 2000 06 : 18 pm to : shirley crenshaw / hou / ect @ ect cc : subject : re : vacation shirley , no problem . vince shirley crenshaw 03 / 08 / 2000 03 : 56 pm to : vince j kaminski / hou / ect @ ect cc : kevin g moore / hou / ect @ ect , william smith / corp / enron @ enron subject : vacation vince : i would like to take the following days as vacation : wednesday , march 15 th friday , march 31 st . please let me know if this is ok with you . thanks ! shirley
## 5724 Subject: re : research and development charges to gpg here it is ! - - - - - - - - - - - - - - - - - - - - - - forwarded by shirley crenshaw / hou / ect on 08 / 14 / 2000 07 : 47 am - - - - - - - - - - - - - - - - - - - - - - - - - - - vince j kaminski 08 / 10 / 2000 02 : 25 pm to : vera apodaca / et & s / enron @ enron cc : vince j kaminski / hou / ect @ ect , shirley crenshaw / hou / ect @ ect , pinnamaneni krishnarao / hou / ect @ ect subject : re : research and development charges to gpg vera , we shall talk to the accounting group about the correction . vince 08 / 09 / 2000 03 : 26 pm vera apodaca @ enron vera apodaca @ enron vera apodaca @ enron 08 / 09 / 2000 03 : 26 pm 08 / 09 / 2000 03 : 26 pm to : pinnamaneni krishnarao / hou / ect @ ect cc : vince j kaminski / hou / ect @ ect subject : research and development charges to gpg per mail dated june 15 from kim watson , there was supposed to have occurred a true - up of $ 274 . 7 in july for the fist six months of 2000 . reviewing july actuals , i was not able to locate this entry . would you pls let me know whether this entry was made , if not , when do you intend to process it . thanks .
## 5725 Subject: re : receipts from visit jim , thanks again for the invitation to visit lsu . shirley will fedex the receipts tomorrow . vince " james r . garven " on 02 / 08 / 2000 07 : 00 : 50 pm to : vince j kaminski cc : subject : receipts from visit dear vince , thanks again for taking the time to visit . ? both faculty and students got a lot out of your presentations . i have a favor to ask concerning the expense reimbursement process . ? can you mail all travel and lodging receipts to my secretary joan payne at the following address : joan payne department of finance 2163 ceba louisiana state university baton rouge , la ? 70803 thanks , jim garven james r . garven william h . wright , jr . endowed chair for financial services department of finance 2158 ceba e . j . ourso college of business administration louisiana state university baton rouge , la ? 70803 - 6308 voice ( 225 ) 388 - 0477 ? | ? fax : ( 800 ) 859 - 6361 e - mail : ? jgarven @ lsu . edu home page : http : / / garven . lsu . edu vita : http : / / garven . lsu . edu / dossier . html research paper archive : http : / / garven . lsu . edu / research . html
## 5726 Subject: re : enron case study update wow ! all on the same day . that ' s super . thank you so very much . vince is coming up to baylor on monday of next week and we will hash out our question list then . thanks john at 04 : 54 pm 11 / 6 / 00 - 0600 , you wrote : > good afternoon john , > > i just want to drop you a line to update you re : andy fastow . i have > confirmed a one hour interview slot with mr . fastow in monday , december 4 th > from > 11 : 00 a . m . - noon . this is in addition to your schedule interviews with > mr . lay and mr . skilling - outline below . > > if you have any questions , please do not hesitate to contact me at > 713 - 853 - 5670 . > > regards , > > cindy > > > - - - - - forwarded by cindy derecskey / corp / enron on 11 / 06 / 2000 04 : 49 pm - - - - - > > cindy > derecskey to : " john martin " > cc : vince j kaminski / hou / ect @ ect , christie patrick / hou / ect @ ect > 10 / 31 / 2000 subject : re : enron case study ( document link : cindy derecskey ) > 01 : 44 pm > > > > > > good afternoon john , > > i hope things are well with you . i am writing to update you on the status > of your meetings with andy fastow , ken lay and jeff skilling . i have > arranged the following meeting dates and times with ken lay and jeff > skilling , ( i am still trying to work with andy fastow ' s schedule ) : > > jeff skilling > december 4 th > 2 : 00 - 3 : 00 p . m . > > ken lay > december 4 th > 3 : 30 - 4 : 30 p . m . > > also , i will attempt to schedule the meeting with andy fastow for december > 4 th for convenience - this will also allow us to possibly schedule > additional meetings for the 5 th ( as needed ) . i will let you know as soon > as i ' m successful . > > regards , > > cindy derecskey > university affairs > enron corp . > > > > > john d . martin carr p . collins chair in finance finance department baylor university po box 98004 waco , tx 76798 254 - 710 - 4473 ( office ) 254 - 710 - 1092 ( fax ) j _ martin @ baylor . edu web : http : / / hsb . baylor . edu / html / martinj / home . html
## 5728 Subject: news : aurora 5 . 2 update aurora version 5 . 2 - the fastest model just got faster - epis announces the release of aurora , version 5 . 2 aurora the electric market price forecasting tool is already legendary for power and speed . we ' ve combined a powerful chronological dispatch model with the capability to simulate the market from 1 day to 25 + years . add to that a risk analysis section , powered by user selectable monte carlo & / or latin hypercube modeling , enough portfolio analysis power to please the toughest critic , & inputs and outputs from standard excel & access tables and you ' ve got one of most powerful tools in the market . just a few months ago we expanded our emissions modeling capabilities , added our quarterly database update , increased the speed of the entire model , and made but that wasn ' t enough . we ' ve done it again . some of the operations that we ' ve included . . . two new reporting enhancements . the first is marginal reporting for fuels , resources and groups of resources . the second is the ability to display resource stack information in graphical and dispatch order form . other enhancements include dual fuel modeling , improved transmission modeling , greater access to hourly results , and the ability to model monthly emission rates . moreover , the databases for central and eastern , texas , and western markets have been updated to use the new modeling capabilities . we continue to make aurora easier to use . this version enhances user control over modeling , editing inputs , and viewing of aurora output . clients desiring to exploit the power of aurora now have greater control over the inputs and outputs through vb scripting in aurora . the new " update data " capability provides a means to universally change any data element . attached is more information on the fastest and most flexible tool of its kind . for additional information , please visit our website ( www . epis . com ) or contact our sales department at ( 503 ) 722 - 2023 . ask about our special 7 - day demo ! v . todd wheeler sales manager epis , inc . ( 503 ) 722 - 2023 tel . ( 503 ) 722 - 7130 fax www . epis . com todd @ epis . com > > - what ' s new - version 5 . 2 information . doc - technical information aurora v 5 - 2 . doc
## spam
## 5722 0
## 5723 0
## 5724 0
## 5725 0
## 5726 0
## 5728 0
## 'data.frame': 4010 obs. of 2 variables:
## $ text: chr "Subject: naturally irresistible your corporate identity lt is really hard to recollect a company : the market is full of suqg"| __truncated__ "Subject: unbelievable new homes made easy im wanting to show you this homeowner you have been pre - approved for a $ 454 , 1"| __truncated__ "Subject: great nnews hello , welcome to medzonline sh groundsel op we are pleased to introduce ourselves as one of the ieadin"| __truncated__ "Subject: here ' s a hot play in motion homeland security investments the terror attacks on the united states on september 11 "| __truncated__ ...
## $ spam: int 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "comment")= chr "glb_trnent_df"
if (!is.null(glb_max_trnent_obs)) {
if (nrow(glb_trnent_df) > glb_max_trnent_obs) {
warning("glb_trnent_df restricted to glb_max_trnent_obs: ",
format(glb_max_trnent_obs, big.mark=","))
org_entity_df <- glb_trnent_df
glb_trnent_df <- org_entity_df[split <-
sample.split(org_entity_df[, glb_rsp_var_raw],
SplitRatio=glb_max_trnent_obs), ]
org_entity_df <- NULL
}
# if (nrow(glb_newent_df) > glb_max_obs) {
# warning("glb_newent_df restricted to glb_max_obs: ", format(glb_max_obs, big.mark=","))
# org_newent_df <- glb_newent_df
# glb_newent_df <- org_newent_df[split <-
# sample.split(org_newent_df[, glb_rsp_var_raw], SplitRatio=glb_max_obs), ]
# org_newent_df <- NULL
# }
}
if (nrow(glb_trnent_df) == nrow(glb_entity_df))
warning("glb_trnent_df same as glb_entity_df")
if (nrow(glb_newent_df) == nrow(glb_entity_df))
warning("glb_newent_df same as glb_entity_df")
if (length(glb_drop_vars) > 0) {
warning("dropping vars: ", paste0(glb_drop_vars, collapse=", "))
glb_entity_df <- glb_entity_df[, setdiff(names(glb_entity_df), glb_drop_vars)]
glb_trnent_df <- glb_trnent_df[, setdiff(names(glb_trnent_df), glb_drop_vars)]
glb_newent_df <- glb_newent_df[, setdiff(names(glb_newent_df), glb_drop_vars)]
}
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="cleanse_data",
chunk_step_major=max(glb_script_df$chunk_step_major)+1,
chunk_step_minor=0,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed import_data 1 0 0.002
## elapsed1 cleanse_data 2 0 2.315
2: cleanse dataglb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="inspectORexplore.data",
chunk_step_major=max(glb_script_df$chunk_step_major),
chunk_step_minor=1,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed1 cleanse_data 2 0 2.315
## elapsed2 inspectORexplore.data 2 1 2.428
2.1: inspect/explore data#print(str(glb_trnent_df))
#View(glb_trnent_df)
# List info gathered for various columns
# <col_name>: <description>; <notes>
# Create new features that help diagnostics
# Create factors of string variables
str_vars <- sapply(1:length(names(glb_trnent_df)),
function(col) ifelse(class(glb_trnent_df[, names(glb_trnent_df)[col]]) == "character",
names(glb_trnent_df)[col], ""))
if (length(str_vars <- setdiff(str_vars[str_vars != ""],
glb_exclude_vars_as_features)) > 0) {
warning("Creating factors of string variables:", paste0(str_vars, collapse=", "))
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, str_vars)
for (var in str_vars) {
glb_entity_df[, paste0(var, ".fctr")] <- factor(glb_entity_df[, var],
as.factor(unique(glb_entity_df[, var])))
glb_trnent_df[, paste0(var, ".fctr")] <- factor(glb_trnent_df[, var],
as.factor(unique(glb_entity_df[, var])))
glb_newent_df[, paste0(var, ".fctr")] <- factor(glb_newent_df[, var],
as.factor(unique(glb_entity_df[, var])))
}
}
# Convert factors to dummy variables
# Build splines require(splines); bsBasis <- bs(training$age, df=3)
add_new_diag_feats <- function(obs_df, ref_df=glb_entity_df) {
require(plyr)
obs_df <- mutate(obs_df,
# <col_name>.NA=is.na(<col_name>),
# <col_name>.fctr=factor(<col_name>,
# as.factor(union(obs_df$<col_name>, obs_twin_df$<col_name>))),
# <col_name>.fctr=relevel(factor(<col_name>,
# as.factor(union(obs_df$<col_name>, obs_twin_df$<col_name>))),
# "<ref_val>"),
# <col2_name>.fctr=relevel(factor(ifelse(<col1_name> == <val>, "<oth_val>", "<ref_val>")),
# as.factor(c("R", "<ref_val>")),
# ref="<ref_val>"),
# This doesn't work - use sapply instead
# <col_name>.fctr_num=grep(<col_name>, levels(<col_name>.fctr)),
#
# Date.my=as.Date(strptime(Date, "%m/%d/%y %H:%M")),
# Year=year(Date.my),
# Month=months(Date.my),
# Weekday=weekdays(Date.my)
# <col_name>.log=log(1 + <col.name>),
# <col_name>=<table>[as.character(<col2_name>)],
# <col_name>=as.numeric(<col2_name>),
.rnorm=rnorm(n=nrow(obs_df))
)
# If levels of a factor are different across obs_df & glb_newent_df; predict.glm fails
# Transformations not handled by mutate
# obs_df$<col_name>.fctr.num <- sapply(1:nrow(obs_df),
# function(row_ix) grep(obs_df[row_ix, "<col_name>"],
# levels(obs_df[row_ix, "<col_name>.fctr"])))
print(summary(obs_df))
print(sapply(names(obs_df), function(col) sum(is.na(obs_df[, col]))))
return(obs_df)
}
glb_entity_df <- add_new_diag_feats(glb_entity_df)
## Loading required package: plyr
## text spam .rnorm
## Length:5728 Min. :0.0000 Min. :-3.845320
## Class :character 1st Qu.:0.0000 1st Qu.:-0.667545
## Mode :character Median :0.0000 Median :-0.002274
## Mean :0.2388 Mean : 0.001124
## 3rd Qu.:0.0000 3rd Qu.: 0.688075
## Max. :1.0000 Max. : 3.847768
## text spam .rnorm
## 0 0 0
glb_trnent_df <- add_new_diag_feats(glb_trnent_df)
## text spam .rnorm
## Length:4010 Min. :0.0000 Min. :-3.40202
## Class :character 1st Qu.:0.0000 1st Qu.:-0.67952
## Mode :character Median :0.0000 Median :-0.02885
## Mean :0.2389 Mean :-0.01302
## 3rd Qu.:0.0000 3rd Qu.: 0.68406
## Max. :1.0000 Max. : 3.06750
## text spam .rnorm
## 0 0 0
glb_newent_df <- add_new_diag_feats(glb_newent_df)
## text spam .rnorm
## Length:1718 Min. :0.0000 Min. :-3.34690
## Class :character 1st Qu.:0.0000 1st Qu.:-0.63544
## Mode :character Median :0.0000 Median : 0.02066
## Mean :0.2386 Mean : 0.01033
## 3rd Qu.:0.0000 3rd Qu.: 0.68257
## Max. :1.0000 Max. : 3.10242
## text spam .rnorm
## 0 0 0
# Histogram of predictor in glb_trnent_df & glb_newent_df
plot_df <- rbind(cbind(glb_trnent_df[, glb_rsp_var_raw, FALSE], data.frame(.data="Training")),
cbind(glb_trnent_df[, glb_rsp_var_raw, FALSE], data.frame(.data="New")))
print(myplot_histogram(plot_df, glb_rsp_var_raw) + facet_wrap(~ .data))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
# used later in encode.retype.data chunk
glb_display_class_dstrb <- function(var) {
plot_df <- rbind(cbind(glb_trnent_df[, var, FALSE],
data.frame(.data="Training")),
cbind(glb_trnent_df[, var, FALSE],
data.frame(.data="New")))
xtab_df <- mycreate_xtab(plot_df, c(".data", var))
rownames(xtab_df) <- xtab_df$.data
xtab_df <- subset(xtab_df, select=-.data)
print(xtab_df / rowSums(xtab_df))
}
if (glb_is_classification) glb_display_class_dstrb(glb_rsp_var_raw)
## Loading required package: reshape2
## spam.0 spam.1
## New 0.7610973 0.2389027
## Training 0.7610973 0.2389027
# Check for duplicates in glb_id_vars
if (length(glb_id_vars) > 0) {
id_vars_dups_df <- subset(id_vars_df <-
mycreate_tbl_df(glb_entity_df[, glb_id_vars, FALSE], glb_id_vars),
.freq > 1)
} else {
tmp_entity_df <- glb_entity_df
tmp_entity_df$.rownames <- rownames(tmp_entity_df)
id_vars_dups_df <- subset(id_vars_df <-
mycreate_tbl_df(tmp_entity_df[, ".rownames", FALSE], ".rownames"),
.freq > 1)
}
if (nrow(id_vars_dups_df) > 0) {
warning("Duplicates found in glb_id_vars data:", nrow(id_vars_dups_df))
myprint_df(id_vars_dups_df)
} else {
# glb_id_vars are unique across obs in both glb_<>_df
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, glb_id_vars)
}
#pairs(subset(glb_trnent_df, select=-c(col_symbol)))
# Check for glb_newent_df & glb_trnent_df features range mismatches
# Other diagnostics:
# print(subset(glb_trnent_df, <col1_name> == max(glb_trnent_df$<col1_name>, na.rm=TRUE) &
# <col2_name> <= mean(glb_trnent_df$<col1_name>, na.rm=TRUE)))
# print(glb_trnent_df[which.max(glb_trnent_df$<col_name>),])
# print(<col_name>_freq_glb_trnent_df <- mycreate_tbl_df(glb_trnent_df, "<col_name>"))
# print(which.min(table(glb_trnent_df$<col_name>)))
# print(which.max(table(glb_trnent_df$<col_name>)))
# print(which.max(table(glb_trnent_df$<col1_name>, glb_trnent_df$<col2_name>)[, 2]))
# print(table(glb_trnent_df$<col1_name>, glb_trnent_df$<col2_name>))
# print(table(is.na(glb_trnent_df$<col1_name>), glb_trnent_df$<col2_name>))
# print(table(sign(glb_trnent_df$<col1_name>), glb_trnent_df$<col2_name>))
# print(mycreate_xtab(glb_trnent_df, <col1_name>))
# print(mycreate_xtab(glb_trnent_df, c(<col1_name>, <col2_name>)))
# print(<col1_name>_<col2_name>_xtab_glb_trnent_df <-
# mycreate_xtab(glb_trnent_df, c("<col1_name>", "<col2_name>")))
# <col1_name>_<col2_name>_xtab_glb_trnent_df[is.na(<col1_name>_<col2_name>_xtab_glb_trnent_df)] <- 0
# print(<col1_name>_<col2_name>_xtab_glb_trnent_df <-
# mutate(<col1_name>_<col2_name>_xtab_glb_trnent_df,
# <col3_name>=(<col1_name> * 1.0) / (<col1_name> + <col2_name>)))
# print(<col2_name>_min_entity_arr <-
# sort(tapply(glb_trnent_df$<col1_name>, glb_trnent_df$<col2_name>, min, na.rm=TRUE)))
# print(<col1_name>_na_by_<col2_name>_arr <-
# sort(tapply(glb_trnent_df$<col1_name>.NA, glb_trnent_df$<col2_name>, mean, na.rm=TRUE)))
# Other plots:
# print(myplot_box(df=glb_trnent_df, ycol_names="<col1_name>"))
# print(myplot_box(df=glb_trnent_df, ycol_names="<col1_name>", xcol_name="<col2_name>"))
# print(myplot_line(subset(glb_trnent_df, Symbol %in% c("KO", "PG")),
# "Date.my", "StockPrice", facet_row_colnames="Symbol") +
# geom_vline(xintercept=as.numeric(as.Date("2003-03-01"))) +
# geom_vline(xintercept=as.numeric(as.Date("1983-01-01")))
# )
# print(myplot_scatter(glb_entity_df, "<col1_name>", "<col2_name>", smooth=TRUE))
# print(myplot_scatter(glb_entity_df, "<col1_name>", "<col2_name>", colorcol_name="<Pred.fctr>") +
# geom_point(data=subset(glb_entity_df, <condition>),
# mapping=aes(x=<x_var>, y=<y_var>), color="red", shape=4, size=5))
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="manage_missing_data",
chunk_step_major=max(glb_script_df$chunk_step_major),
chunk_step_minor=glb_script_df[nrow(glb_script_df), "chunk_step_minor"]+1,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed2 inspectORexplore.data 2 1 2.428
## elapsed3 manage_missing_data 2 2 4.095
2.2: manage missing data# print(sapply(names(glb_trnent_df), function(col) sum(is.na(glb_trnent_df[, col]))))
# print(sapply(names(glb_newent_df), function(col) sum(is.na(glb_newent_df[, col]))))
# glb_trnent_df <- na.omit(glb_trnent_df)
# glb_newent_df <- na.omit(glb_newent_df)
# df[is.na(df)] <- 0
# Not refactored into mydsutils.R since glb_*_df might be reassigned
glb_impute_missing_data <- function(entity_df, newent_df) {
if (!glb_is_separate_newent_dataset) {
# Combine entity & newent
union_df <- rbind(mutate(entity_df, .src = "entity"),
mutate(newent_df, .src = "newent"))
union_imputed_df <- union_df[, setdiff(setdiff(names(entity_df),
glb_rsp_var),
glb_exclude_vars_as_features)]
print(summary(union_imputed_df))
require(mice)
set.seed(glb_mice_complete.seed)
union_imputed_df <- complete(mice(union_imputed_df))
print(summary(union_imputed_df))
union_df[, names(union_imputed_df)] <- union_imputed_df[, names(union_imputed_df)]
print(summary(union_df))
# union_df$.rownames <- rownames(union_df)
# union_df <- orderBy(~.rownames, union_df)
#
# imp_entity_df <- myimport_data(
# url="<imputed_trnng_url>",
# comment="imp_entity_df", force_header=TRUE, print_diagn=TRUE)
# print(all.equal(subset(union_df, select=-c(.src, .rownames, .rnorm)),
# imp_entity_df))
# Partition again
glb_trnent_df <<- subset(union_df, .src == "entity", select=-c(.src, .rownames))
comment(glb_trnent_df) <- "entity_df"
glb_newent_df <<- subset(union_df, .src == "newent", select=-c(.src, .rownames))
comment(glb_newent_df) <- "newent_df"
# Generate summaries
print(summary(entity_df))
print(sapply(names(entity_df), function(col) sum(is.na(entity_df[, col]))))
print(summary(newent_df))
print(sapply(names(newent_df), function(col) sum(is.na(newent_df[, col]))))
} else stop("Not implemented yet")
}
if (glb_impute_na_data) {
if ((sum(sapply(names(glb_trnent_df),
function(col) sum(is.na(glb_trnent_df[, col])))) > 0) |
(sum(sapply(names(glb_newent_df),
function(col) sum(is.na(glb_newent_df[, col])))) > 0))
glb_impute_missing_data(glb_trnent_df, glb_newent_df)
}
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="encodeORretype.data",
chunk_step_major=max(glb_script_df$chunk_step_major),
chunk_step_minor=glb_script_df[nrow(glb_script_df), "chunk_step_minor"]+1,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed3 manage_missing_data 2 2 4.095
## elapsed4 encodeORretype.data 2 3 4.383
2.3: encode/retype data# map_<col_name>_df <- myimport_data(
# url="<map_url>",
# comment="map_<col_name>_df", print_diagn=TRUE)
# map_<col_name>_df <- read.csv(paste0(getwd(), "/data/<file_name>.csv"), strip.white=TRUE)
# glb_trnent_df <- mymap_codes(glb_trnent_df, "<from_col_name>", "<to_col_name>",
# map_<to_col_name>_df, map_join_col_name="<map_join_col_name>",
# map_tgt_col_name="<to_col_name>")
# glb_newent_df <- mymap_codes(glb_newent_df, "<from_col_name>", "<to_col_name>",
# map_<to_col_name>_df, map_join_col_name="<map_join_col_name>",
# map_tgt_col_name="<to_col_name>")
# glb_trnent_df$<col_name>.fctr <- factor(glb_trnent_df$<col_name>,
# as.factor(union(glb_trnent_df$<col_name>, glb_newent_df$<col_name>)))
# glb_newent_df$<col_name>.fctr <- factor(glb_newent_df$<col_name>,
# as.factor(union(glb_trnent_df$<col_name>, glb_newent_df$<col_name>)))
if (!is.null(glb_map_rsp_raw_to_var)) {
glb_entity_df[, glb_rsp_var] <-
glb_map_rsp_raw_to_var(glb_entity_df[, glb_rsp_var_raw])
mycheck_map_results(mapd_df=glb_entity_df,
from_col_name=glb_rsp_var_raw, to_col_name=glb_rsp_var)
glb_trnent_df[, glb_rsp_var] <-
glb_map_rsp_raw_to_var(glb_trnent_df[, glb_rsp_var_raw])
mycheck_map_results(mapd_df=glb_trnent_df,
from_col_name=glb_rsp_var_raw, to_col_name=glb_rsp_var)
glb_newent_df[, glb_rsp_var] <-
glb_map_rsp_raw_to_var(glb_newent_df[, glb_rsp_var_raw])
mycheck_map_results(mapd_df=glb_newent_df,
from_col_name=glb_rsp_var_raw, to_col_name=glb_rsp_var)
if (glb_is_classification) glb_display_class_dstrb(glb_rsp_var)
}
## Loading required package: sqldf
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
## Loading required package: tcltk
## spam spam.fctr .n
## 1 0 N 4360
## 2 1 Y 1368
## spam spam.fctr .n
## 1 0 N 3052
## 2 1 Y 958
## spam spam.fctr .n
## 1 0 N 1308
## 2 1 Y 410
## spam.fctr.N spam.fctr.Y
## New 0.7610973 0.2389027
## Training 0.7610973 0.2389027
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="extract.features",
chunk_step_major=max(glb_script_df$chunk_step_major)+1,
chunk_step_minor=0,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed4 encodeORretype.data 2 3 4.383
## elapsed5 extract.features 3 0 7.282
3: extract features#```{r extract_features, cache=FALSE, eval=glb_is_textual}
# Create new features that help prediction
# <col_name>.lag.2 <- lag(zoo(glb_trnent_df$<col_name>), -2, na.pad=TRUE)
# glb_trnent_df[, "<col_name>.lag.2"] <- coredata(<col_name>.lag.2)
# <col_name>.lag.2 <- lag(zoo(glb_newent_df$<col_name>), -2, na.pad=TRUE)
# glb_newent_df[, "<col_name>.lag.2"] <- coredata(<col_name>.lag.2)
#
# glb_newent_df[1, "<col_name>.lag.2"] <- glb_trnent_df[nrow(glb_trnent_df) - 1,
# "<col_name>"]
# glb_newent_df[2, "<col_name>.lag.2"] <- glb_trnent_df[nrow(glb_trnent_df),
# "<col_name>"]
# glb_entity_df <- mutate(glb_entity_df,
# A.has.http=ifelse(grepl("http",Added,fixed=TRUE), 1, 0)
# )
#
# glb_trnent_df <- mutate(glb_trnent_df,
# )
#
# glb_newent_df <- mutate(glb_newent_df,
# )
if (glb_is_textual) {
require(tm)
glb_corpus_lst <- list(); glb_full_DTM_lst <- list(); glb_sprs_DTM_lst <- list();
for (txt_var in glb_txt_vars) {
print(sprintf("Building corpus for %s...", txt_var))
txt_corpus <- Corpus(VectorSource(glb_entity_df[, txt_var]))
txt_corpus <- tm_map(txt_corpus, tolower)
txt_corpus <- tm_map(txt_corpus, PlainTextDocument)
txt_corpus <- tm_map(txt_corpus, removePunctuation)
txt_corpus <- tm_map(txt_corpus, removeWords,
c(glb_append_stop_words, stopwords("english")))
txt_corpus <- tm_map(txt_corpus, stemDocument)
full_freqs_DTM <- DocumentTermMatrix(txt_corpus)
print(" Full freqs:"); print(full_freqs_DTM)
full_freqs_vctr <- colSums(as.matrix(full_freqs_DTM))
names(full_freqs_vctr) <- dimnames(full_freqs_DTM)[[2]]
full_freqs_df <- as.data.frame(full_freqs_vctr)
names(full_freqs_df) <- "freq.full"
full_freqs_df$term <- rownames(full_freqs_df)
full_freqs_df <- orderBy(~ -freq.full, full_freqs_df)
sprs_freqs_DTM <- removeSparseTerms(full_freqs_DTM, glb_sprs_threshold)
print(" Sparse freqs:"); print(sprs_freqs_DTM)
sprs_freqs_vctr <- colSums(as.matrix(sprs_freqs_DTM))
names(sprs_freqs_vctr) <- dimnames(sprs_freqs_DTM)[[2]]
sprs_freqs_df <- as.data.frame(sprs_freqs_vctr)
names(sprs_freqs_df) <- "freq.sprs"
sprs_freqs_df$term <- rownames(sprs_freqs_df)
sprs_freqs_df <- orderBy(~ -freq.sprs, sprs_freqs_df)
terms_freqs_df <- merge(full_freqs_df, sprs_freqs_df, all.x=TRUE)
melt_freqs_df <- orderBy(~ -value, melt(terms_freqs_df, id.var="term"))
print(ggplot(melt_freqs_df, aes(value, color=variable)) + stat_ecdf() +
geom_hline(yintercept=glb_sprs_threshold, linetype = "dotted"))
print(myplot_hbar(head(melt_freqs_df, 20), "term", "value",
colorcol_name="variable"))
melt_freqs_df <- orderBy(~ -value,
melt(subset(terms_freqs_df, is.na(freq.sprs)), id.var="term"))
print(myplot_hbar(head(melt_freqs_df, 10), "term", "value",
colorcol_name="variable"))
glb_corpus_lst[[txt_var]] <- txt_corpus
glb_full_DTM_lst[[txt_var]] <- full_freqs_DTM
glb_sprs_DTM_lst[[txt_var]] <- sprs_freqs_DTM
}
# Create txt features
if ((length(glb_txt_vars) > 1) &&
(length(unique(pfxs <- sapply(glb_txt_vars,
function(txt) toupper(substr(txt, 1, 1))))) < length(glb_txt_vars)))
stop("Prefixes for corpus freq terms not unique: ", pfxs)
for (txt_var in glb_txt_vars) {
print(sprintf("Binding DTM for %s...", txt_var))
txt_X_df <- as.data.frame(as.matrix(glb_sprs_DTM_lst[[txt_var]]))
colnames(txt_X_df) <- paste(toupper(substr(txt_var, 1, 1)), ".",
make.names(colnames(txt_X_df)), sep="")
rownames(txt_X_df) <- rownames(glb_entity_df) # warning otherwise
glb_entity_df <- cbind(glb_entity_df, txt_X_df)
# Create <txt_var>.has.http
glb_entity_df[, paste(toupper(substr(txt_var, 1, 1)), ".has.http", sep="")] <-
sapply(1:nrow(glb_entity_df),
function(row_ix) ifelse(grepl("http", glb_entity_df[row_ix, txt_var], fixed=TRUE),
1, 0))
# Create <txt_var>.num.chars
glb_entity_df[, paste(toupper(substr(txt_var, 1, 1)), ".num.chars", sep="")] <-
sapply(1:nrow(glb_entity_df),
function(row_ix) nchar(glb_entity_df[row_ix, txt_var]))
# Create <txt_var>.num.words & .num.words.unq
glb_entity_df[, paste(toupper(substr(txt_var, 1, 1)), ".num.words", sep="")] <-
rowSums(as.matrix(glb_full_DTM_lst[[txt_var]]))
glb_entity_df[, paste(toupper(substr(txt_var, 1, 1)), ".num.words.unq", sep="")] <-
rowSums(as.matrix(glb_full_DTM_lst[[txt_var]]) != 0)
for (feat in paste(toupper(substr(txt_var, 1, 1)),
c(".num.chars", ".num.words", ".num.words.unq"), sep="")) {
glb_entity_df[, paste0(feat, ".log")] <- log(1 + glb_entity_df[, feat])
print(myplot_box(glb_entity_df, paste0(feat, ".log"), glb_rsp_var))
}
}
# a working copy of this is reqd in manage.missingdata chunk
union_df <- rbind(mutate(glb_trnent_df, .src = "trnent"),
mutate(glb_newent_df, .src = "newent"))
tmp_entity_df <- glb_entity_df
mrg_id_vars <- ifelse(length(glb_id_vars) > 0, glb_id_vars, ".rownames")
if (mrg_id_vars == ".rownames") {
union_df$.rownames <- rownames(union_df)
tmp_entity_df$.rownames <- rownames(tmp_entity_df)
}
mrg_entity_df <- merge(tmp_entity_df, union_df[, c(".src", mrg_id_vars)])
# Partition again
glb_trnent_df <- subset(mrg_entity_df, .src == "trnent", select=-c(.src))
glb_newent_df <- subset(mrg_entity_df, .src == "newent", select=-c(.src))
if (mrg_id_vars == ".rownames") {
glb_trnent_df <- subset(glb_trnent_df, select=-c(.rownames))
glb_newent_df <- subset(glb_newent_df, select=-c(.rownames))
}
comment(glb_trnent_df) <- "trnent_df"
comment(glb_newent_df) <- "newent_df"
# Generate summaries
# print(summary(glb_entity_df))
# print(sapply(names(glb_entity_df), function(col) sum(is.na(glb_entity_df[, col]))))
# print(summary(glb_trnent_df))
# print(sapply(names(glb_trnent_df), function(col) sum(is.na(glb_trnent_df[, col]))))
# print(summary(glb_newent_df))
# print(sapply(names(glb_newent_df), function(col) sum(is.na(glb_newent_df[, col]))))
}
## Loading required package: tm
## Loading required package: NLP
##
## Attaching package: 'NLP'
##
## The following object is masked from 'package:ggplot2':
##
## annotate
## [1] "Building corpus for text..."
## [1] " Full freqs:"
## <<DocumentTermMatrix (documents: 5728, terms: 28687)>>
## Non-/sparse entries: 481719/163837417
## Sparsity : 100%
## Maximal term length: 24
## Weighting : term frequency (tf)
## [1] " Sparse freqs:"
## <<DocumentTermMatrix (documents: 5728, terms: 330)>>
## Non-/sparse entries: 213551/1676689
## Sparsity : 89%
## Maximal term length: 10
## Weighting : term frequency (tf)
## Warning: Removed 6 rows containing missing values (geom_path).
## [1] "Binding DTM for text..."
# print(sapply(names(glb_trnent_df), function(col) sum(is.na(glb_trnent_df[, col]))))
# print(sapply(names(glb_newent_df), function(col) sum(is.na(glb_newent_df[, col]))))
# print(myplot_scatter(glb_trnent_df, "<col1_name>", "<col2_name>", smooth=TRUE))
replay.petrisim(pn=glb_analytics_pn,
replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"data.training.all","data.new")), flip_coord=TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="select_features",
chunk_step_major=max(glb_script_df$chunk_step_major)+1,
chunk_step_minor=0,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed5 extract.features 3 0 7.282
## elapsed6 select_features 4 0 54.204
4: select featuresprint(glb_feats_df <- myselect_features(entity_df=glb_trnent_df,
exclude_vars_as_features=glb_exclude_vars_as_features,
rsp_var=glb_rsp_var))
## id cor.y exclude.as.feat
## spam spam 1.0000000000 1
## T.vinc T.vinc -0.3181138032 0
## T.thank T.thank -0.2949866270 0
## T.subject T.subject -0.2742522973 0
## T.click T.click 0.2657893248 0
## T.kaminski T.kaminski -0.2650520320 0
## T.life T.life 0.2542534429 0
## T.remov T.remov 0.2431172421 0
## T.X2000 T.X2000 -0.2329015803 0
## T.websit T.websit 0.2205915225 0
## T.let T.let -0.1984382892 0
## T.money T.money 0.1950121271 0
## T.forward T.forward -0.1933717118 0
## T.pleas T.pleas -0.1909556548 0
## T.enron T.enron -0.1864662681 0
## T.research T.research -0.1841301335 0
## T.ect T.ect -0.1787183496 0
## T.regard T.regard -0.1784745678 0
## T.X2001 T.X2001 -0.1765484637 0
## T.now T.now 0.1759665053 0
## T.onlin T.onlin 0.1733740078 0
## T.attach T.attach -0.1712313186 0
## T.receiv T.receiv 0.1687818268 0
## T.hope T.hope -0.1662362379 0
## T.know T.know -0.1658411125 0
## T.softwar T.softwar 0.1612232752 0
## T.hou T.hou -0.1588378340 0
## T.invest T.invest 0.1563160859 0
## T.without T.without 0.1559808594 0
## T.secur T.secur 0.1548974786 0
## T.has.http T.has.http 0.1546324286 0
## T.discuss T.discuss -0.1535472399 0
## T.group T.group -0.1533114490 0
## T.shirley T.shirley -0.1510181510 0
## T.X713 T.X713 -0.1495101398 0
## T.offer T.offer 0.1487070000 0
## T.schedul T.schedul -0.1465912289 0
## T.num.chars.log T.num.chars.log -0.1455685879 0
## T.special T.special 0.1448517517 0
## T.num.words.log T.num.words.log -0.1440768533 0
## T.free T.free 0.1417992288 0
## T.talk T.talk -0.1416300933 0
## T.stinson T.stinson -0.1414714170 0
## T.meet T.meet -0.1411273291 0
## T.within T.within 0.1374174643 0
## T.question T.question -0.1341087114 0
## T.account T.account 0.1313479737 0
## T.crenshaw T.crenshaw -0.1311650803 0
## T.gibner T.gibner -0.1296610131 0
## T.houston T.houston -0.1293812505 0
## T.net T.net 0.1258019309 0
## T.just T.just 0.1252238480 0
## T.model T.model -0.1247536864 0
## T.compani T.compani 0.1240836965 0
## T.vkamin T.vkamin -0.1235350771 0
## T.interview T.interview -0.1231062091 0
## T.shall T.shall -0.1226099197 0
## T.appreci T.appreci -0.1221464744 0
## T.univers T.univers -0.1212534115 0
## T.monday T.monday -0.1194585534 0
## T.wish T.wish 0.1186807642 0
## T.custom T.custom 0.1164366855 0
## T.morn T.morn -0.1164137393 0
## T.corp T.corp -0.1147582983 0
## T.attend T.attend -0.1144674018 0
## T.john T.john -0.1140403295 0
## T.X853 T.X853 -0.1140335522 0
## T.thursday T.thursday -0.1127452699 0
## T.resum T.resum -0.1121691760 0
## T.friday T.friday -0.1120976417 0
## T.busi T.busi 0.1095075205 0
## T.suggest T.suggest -0.1094172581 0
## T.mail T.mail 0.1085842197 0
## T.doc T.doc -0.1083611032 0
## T.right T.right 0.1060564074 0
## T.financ T.financ -0.1045890279 0
## T.energi T.energi -0.1044570231 0
## T.ask T.ask -0.1036818687 0
## T.manag T.manag -0.1036145841 0
## T.futur T.futur 0.1027319286 0
## T.email T.email 0.1023923165 0
## T.arrang T.arrang -0.1018207213 0
## T.site T.site 0.1014465328 0
## T.wednesday T.wednesday -0.1010425980 0
## T.address T.address 0.0996708734 0
## T.edu T.edu -0.0982918252 0
## T.home T.home 0.0973272169 0
## T.dear T.dear -0.0965980343 0
## T.internet T.internet 0.0963431994 0
## T.confer T.confer -0.0954039390 0
## T.option T.option -0.0947335835 0
## T.call T.call -0.0943589896 0
## T.X000 T.X000 0.0940159920 0
## T.effect T.effect 0.0935998097 0
## T.student T.student -0.0933439568 0
## T.robert T.robert -0.0928419819 0
## T.kevin T.kevin -0.0927983452 0
## T.london T.london -0.0927913079 0
## T.school T.school -0.0924453207 0
## T.week T.week -0.0916969807 0
## T.possibl T.possibl -0.0915695518 0
## T.list T.list 0.0912739361 0
## T.analysi T.analysi -0.0910457873 0
## T.think T.think -0.0906628209 0
## T.depart T.depart -0.0904778162 0
## T.invit T.invit -0.0890043574 0
## T.num.words.unq.log T.num.words.unq.log -0.0889207444 0
## T.like T.like -0.0887544511 0
## T.risk T.risk -0.0881542117 0
## T.april T.april -0.0876138572 0
## T.made T.made 0.0873106432 0
## T.request T.request -0.0858771771 0
## T.success T.success 0.0850351303 0
## T.last T.last -0.0846935116 0
## T.soon T.soon -0.0838069893 0
## T.mark T.mark -0.0837776089 0
## T.order T.order 0.0833470697 0
## T.tuesday T.tuesday -0.0830155913 0
## T.result T.result 0.0830096996 0
## T.set T.set -0.0828285961 0
## T.also T.also -0.0820622159 0
## T.date T.date -0.0812755078 0
## T.buy T.buy 0.0808969779 0
## T.link T.link 0.0801420776 0
## T.http T.http 0.0800627311 0
## T.happi T.happi -0.0797444804 0
## T.present T.present -0.0794303423 0
## T.deriv T.deriv -0.0786592130 0
## T.team T.team -0.0779983734 0
## T.begin T.begin -0.0771229704 0
## T.design T.design 0.0766707755 0
## T.togeth T.togeth -0.0761858529 0
## T.sent T.sent -0.0746954965 0
## T.issu T.issu -0.0742893516 0
## T.peopl T.peopl 0.0739160630 0
## T.data T.data -0.0738487730 0
## T.mention T.mention -0.0720983856 0
## T.develop T.develop -0.0720388395 0
## T.product T.product 0.0715115284 0
## T.howev T.howev -0.0712701234 0
## T.understand T.understand -0.0710122486 0
## T.next. T.next. -0.0702153197 0
## T.repli T.repli 0.0700943046 0
## T.contact T.contact -0.0699343534 0
## T.problem T.problem -0.0697427037 0
## T.origin T.origin -0.0692795506 0
## T.work T.work -0.0682900073 0
## T.line T.line 0.0679815330 0
## T.thing T.thing 0.0676896421 0
## T.fax T.fax -0.0670894817 0
## T.run T.run -0.0669971544 0
## T.inform T.inform 0.0666736466 0
## T.will T.will -0.0665079616 0
## T.confirm T.confirm -0.0664106329 0
## T.still T.still -0.0663399134 0
## T.info T.info 0.0663165694 0
## T.etc T.etc -0.0652946470 0
## T.particip T.particip -0.0649046742 0
## T.associ T.associ -0.0644852670 0
## T.copi T.copi -0.0643112761 0
## T.comment T.comment -0.0642596100 0
## T.hear T.hear -0.0642102696 0
## T.offic T.offic -0.0638067201 0
## T.immedi T.immedi 0.0636714595 0
## T.make T.make 0.0631366852 0
## T.way T.way 0.0626661827 0
## T.book T.book -0.0625010987 0
## T.thought T.thought -0.0624397814 0
## T.report T.report 0.0623679487 0
## T.note T.note -0.0619902255 0
## T.start T.start 0.0616916942 0
## T.point T.point -0.0602478720 0
## T.two T.two -0.0598421384 0
## T.help T.help -0.0597389247 0
## T.director T.director -0.0597083651 0
## T.phone T.phone -0.0584657986 0
## T.abl T.abl -0.0579660702 0
## T.believ T.believ 0.0577847153 0
## T.web T.web 0.0577720345 0
## T.direct T.direct -0.0576243342 0
## T.back T.back -0.0569490315 0
## T.provid T.provid 0.0569426417 0
## T.follow T.follow -0.0566942671 0
## T.juli T.juli -0.0541972695 0
## T.power T.power -0.0535301754 0
## T.mean T.mean 0.0535012931 0
## T.give T.give -0.0534532759 0
## T.join T.join -0.0533575547 0
## T.support T.support -0.0530429277 0
## T.read T.read 0.0528329213 0
## T.num.words T.num.words -0.0527500900 0
## T.num.chars T.num.chars -0.0524196291 0
## T.well T.well -0.0523933425 0
## T.need T.need -0.0523199878 0
## T.final T.final -0.0520091641 0
## T.one T.one 0.0506678105 0
## T.communic T.communic -0.0500034930 0
## T.either T.either -0.0499022910 0
## T.expect T.expect 0.0497864062 0
## T.updat T.updat -0.0496413250 0
## T.gas T.gas -0.0495248812 0
## T.respond T.respond -0.0493495900 0
## T.per T.per 0.0492077392 0
## T.full T.full 0.0490145812 0
## T.case T.case -0.0487433346 0
## T.return T.return 0.0478946173 0
## T.feel T.feel -0.0474010255 0
## T.version T.version 0.0472326491 0
## T.idea T.idea -0.0471329213 0
## T.trade T.trade -0.0467818091 0
## T.open T.open -0.0465715601 0
## T.high T.high 0.0464722890 0
## T.process T.process -0.0461567802 0
## T.might T.might -0.0460962123 0
## T.get T.get 0.0457042259 0
## T.detail T.detail -0.0444871076 0
## T.addit T.addit -0.0437829267 0
## T.com T.com -0.0433966800 0
## T.project T.project -0.0432261317 0
## T.time T.time -0.0431450343 0
## T.year T.year -0.0430454013 0
## T.don T.don 0.0428995876 0
## T.end T.end -0.0420623769 0
## T.sincer T.sincer -0.0414940836 0
## T.respons T.respons -0.0410279129 0
## T.come T.come -0.0410008395 0
## T.contract T.contract -0.0405180394 0
## T.www T.www 0.0399480693 0
## T.best T.best 0.0398959074 0
## T.sinc T.sinc -0.0396341699 0
## T.today T.today 0.0394257429 0
## T.cours T.cours -0.0391356944 0
## T.can T.can -0.0390445514 0
## T.continu T.continu -0.0383716340 0
## T.name T.name 0.0383030252 0
## T.anoth T.anoth -0.0378074765 0
## T.locat T.locat -0.0377448379 0
## T.send T.send 0.0376573942 0
## T.even T.even 0.0375582942 0
## T.review T.review -0.0367880188 0
## T.mani T.mani 0.0362883851 0
## T.sorri T.sorri -0.0362540720 0
## T.posit T.posit -0.0360326427 0
## T.put T.put -0.0359657007 0
## T.move T.move -0.0359061954 0
## T.price T.price -0.0351068190 0
## T.experi T.experi -0.0350513391 0
## T.involv T.involv 0.0347124963 0
## T.given T.given -0.0346627918 0
## T.num.words.unq T.num.words.unq -0.0345410286 0
## T.deal T.deal -0.0343820752 0
## T.current T.current -0.0342745848 0
## T.area T.area -0.0340797023 0
## T.form T.form 0.0338707071 0
## T.servic T.servic 0.0337403188 0
## T.bring T.bring -0.0331450379 0
## T.system T.system 0.0324103996 0
## T.want T.want 0.0323104205 0
## T.increas T.increas 0.0309706690 0
## T.look T.look -0.0309057968 0
## T.opportun T.opportun -0.0307220593 0
## T.approv T.approv -0.0307077438 0
## T.creat T.creat 0.0301405208 0
## T.resourc T.resourc -0.0292356177 0
## T.relat T.relat -0.0288844112 0
## T.credit T.credit -0.0287077376 0
## T.visit T.visit -0.0286237098 0
## T.hour T.hour 0.0281047902 0
## T.corpor T.corpor 0.0280793191 0
## T.check T.check 0.0279556976 0
## T.good T.good -0.0272279489 0
## T.say T.say 0.0262260668 0
## T.engin T.engin 0.0253306210 0
## T.alreadi T.alreadi -0.0250216840 0
## T.long T.long -0.0249203970 0
## T.specif T.specif -0.0246339979 0
## T.differ T.differ -0.0243752924 0
## T.great T.great -0.0236211947 0
## T.number T.number -0.0234857598 0
## T.hello T.hello 0.0233464644 0
## T.intern T.intern -0.0232839939 0
## T.member T.member -0.0224540197 0
## T.event T.event -0.0215873205 0
## T.plan T.plan -0.0215226695 0
## T.keep T.keep 0.0213379127 0
## T.person T.person 0.0213046754 0
## T.may T.may -0.0209343125 0
## T.avail T.avail 0.0203738484 0
## T.chang T.chang -0.0200456328 0
## T.recent T.recent -0.0199602773 0
## T.see T.see -0.0196578866 0
## T.file T.file -0.0192972102 0
## T.better T.better -0.0191663387 0
## T.part T.part -0.0184082049 0
## T.unit T.unit -0.0183074565 0
## T.month T.month 0.0181070117 0
## T.real T.real 0.0176738908 0
## T.program T.program 0.0175949191 0
## T.interest T.interest -0.0169701603 0
## T.short T.short -0.0169549005 0
## T.write T.write 0.0163589185 0
## T.place T.place 0.0162785786 0
## T.term T.term -0.0157925005 0
## T.access T.access -0.0155755615 0
## .rnorm .rnorm -0.0149916451 0
## T.import T.import 0.0145290721 0
## T.new T.new -0.0132368059 0
## T.valu T.valu -0.0130686578 0
## T.complet T.complet 0.0127091694 0
## T.sure T.sure -0.0124904057 0
## T.public T.public -0.0123208718 0
## T.find T.find -0.0121708040 0
## T.state T.state 0.0113101183 0
## T.type T.type 0.0109271706 0
## T.done T.done 0.0108049574 0
## T.rate T.rate 0.0107974523 0
## T.much T.much -0.0103939917 0
## T.cost T.cost -0.0094544027 0
## T.financi T.financi 0.0093179100 0
## T.includ T.includ 0.0091397492 0
## T.industri T.industri 0.0086920687 0
## T.lot T.lot 0.0081198806 0
## T.first T.first -0.0077751788 0
## T.oper T.oper -0.0074123474 0
## T.sever T.sever 0.0065969188 0
## T.use T.use -0.0063128080 0
## T.base T.base 0.0062517784 0
## T.market T.market -0.0058950194 0
## T.applic T.applic 0.0050352482 0
## T.tri T.tri -0.0038171490 0
## T.effort T.effort -0.0034438302 0
## T.due T.due 0.0034232838 0
## T.allow T.allow -0.0030796408 0
## T.take T.take -0.0023729634 0
## T.realli T.realli 0.0019275089 0
## T.day T.day 0.0007891443 0
## T.assist T.assist -0.0006189100 0
## T.messag T.messag -0.0005575180 0
## T.requir T.requir 0.0004346765 0
## cor.y.abs
## spam 1.0000000000
## T.vinc 0.3181138032
## T.thank 0.2949866270
## T.subject 0.2742522973
## T.click 0.2657893248
## T.kaminski 0.2650520320
## T.life 0.2542534429
## T.remov 0.2431172421
## T.X2000 0.2329015803
## T.websit 0.2205915225
## T.let 0.1984382892
## T.money 0.1950121271
## T.forward 0.1933717118
## T.pleas 0.1909556548
## T.enron 0.1864662681
## T.research 0.1841301335
## T.ect 0.1787183496
## T.regard 0.1784745678
## T.X2001 0.1765484637
## T.now 0.1759665053
## T.onlin 0.1733740078
## T.attach 0.1712313186
## T.receiv 0.1687818268
## T.hope 0.1662362379
## T.know 0.1658411125
## T.softwar 0.1612232752
## T.hou 0.1588378340
## T.invest 0.1563160859
## T.without 0.1559808594
## T.secur 0.1548974786
## T.has.http 0.1546324286
## T.discuss 0.1535472399
## T.group 0.1533114490
## T.shirley 0.1510181510
## T.X713 0.1495101398
## T.offer 0.1487070000
## T.schedul 0.1465912289
## T.num.chars.log 0.1455685879
## T.special 0.1448517517
## T.num.words.log 0.1440768533
## T.free 0.1417992288
## T.talk 0.1416300933
## T.stinson 0.1414714170
## T.meet 0.1411273291
## T.within 0.1374174643
## T.question 0.1341087114
## T.account 0.1313479737
## T.crenshaw 0.1311650803
## T.gibner 0.1296610131
## T.houston 0.1293812505
## T.net 0.1258019309
## T.just 0.1252238480
## T.model 0.1247536864
## T.compani 0.1240836965
## T.vkamin 0.1235350771
## T.interview 0.1231062091
## T.shall 0.1226099197
## T.appreci 0.1221464744
## T.univers 0.1212534115
## T.monday 0.1194585534
## T.wish 0.1186807642
## T.custom 0.1164366855
## T.morn 0.1164137393
## T.corp 0.1147582983
## T.attend 0.1144674018
## T.john 0.1140403295
## T.X853 0.1140335522
## T.thursday 0.1127452699
## T.resum 0.1121691760
## T.friday 0.1120976417
## T.busi 0.1095075205
## T.suggest 0.1094172581
## T.mail 0.1085842197
## T.doc 0.1083611032
## T.right 0.1060564074
## T.financ 0.1045890279
## T.energi 0.1044570231
## T.ask 0.1036818687
## T.manag 0.1036145841
## T.futur 0.1027319286
## T.email 0.1023923165
## T.arrang 0.1018207213
## T.site 0.1014465328
## T.wednesday 0.1010425980
## T.address 0.0996708734
## T.edu 0.0982918252
## T.home 0.0973272169
## T.dear 0.0965980343
## T.internet 0.0963431994
## T.confer 0.0954039390
## T.option 0.0947335835
## T.call 0.0943589896
## T.X000 0.0940159920
## T.effect 0.0935998097
## T.student 0.0933439568
## T.robert 0.0928419819
## T.kevin 0.0927983452
## T.london 0.0927913079
## T.school 0.0924453207
## T.week 0.0916969807
## T.possibl 0.0915695518
## T.list 0.0912739361
## T.analysi 0.0910457873
## T.think 0.0906628209
## T.depart 0.0904778162
## T.invit 0.0890043574
## T.num.words.unq.log 0.0889207444
## T.like 0.0887544511
## T.risk 0.0881542117
## T.april 0.0876138572
## T.made 0.0873106432
## T.request 0.0858771771
## T.success 0.0850351303
## T.last 0.0846935116
## T.soon 0.0838069893
## T.mark 0.0837776089
## T.order 0.0833470697
## T.tuesday 0.0830155913
## T.result 0.0830096996
## T.set 0.0828285961
## T.also 0.0820622159
## T.date 0.0812755078
## T.buy 0.0808969779
## T.link 0.0801420776
## T.http 0.0800627311
## T.happi 0.0797444804
## T.present 0.0794303423
## T.deriv 0.0786592130
## T.team 0.0779983734
## T.begin 0.0771229704
## T.design 0.0766707755
## T.togeth 0.0761858529
## T.sent 0.0746954965
## T.issu 0.0742893516
## T.peopl 0.0739160630
## T.data 0.0738487730
## T.mention 0.0720983856
## T.develop 0.0720388395
## T.product 0.0715115284
## T.howev 0.0712701234
## T.understand 0.0710122486
## T.next. 0.0702153197
## T.repli 0.0700943046
## T.contact 0.0699343534
## T.problem 0.0697427037
## T.origin 0.0692795506
## T.work 0.0682900073
## T.line 0.0679815330
## T.thing 0.0676896421
## T.fax 0.0670894817
## T.run 0.0669971544
## T.inform 0.0666736466
## T.will 0.0665079616
## T.confirm 0.0664106329
## T.still 0.0663399134
## T.info 0.0663165694
## T.etc 0.0652946470
## T.particip 0.0649046742
## T.associ 0.0644852670
## T.copi 0.0643112761
## T.comment 0.0642596100
## T.hear 0.0642102696
## T.offic 0.0638067201
## T.immedi 0.0636714595
## T.make 0.0631366852
## T.way 0.0626661827
## T.book 0.0625010987
## T.thought 0.0624397814
## T.report 0.0623679487
## T.note 0.0619902255
## T.start 0.0616916942
## T.point 0.0602478720
## T.two 0.0598421384
## T.help 0.0597389247
## T.director 0.0597083651
## T.phone 0.0584657986
## T.abl 0.0579660702
## T.believ 0.0577847153
## T.web 0.0577720345
## T.direct 0.0576243342
## T.back 0.0569490315
## T.provid 0.0569426417
## T.follow 0.0566942671
## T.juli 0.0541972695
## T.power 0.0535301754
## T.mean 0.0535012931
## T.give 0.0534532759
## T.join 0.0533575547
## T.support 0.0530429277
## T.read 0.0528329213
## T.num.words 0.0527500900
## T.num.chars 0.0524196291
## T.well 0.0523933425
## T.need 0.0523199878
## T.final 0.0520091641
## T.one 0.0506678105
## T.communic 0.0500034930
## T.either 0.0499022910
## T.expect 0.0497864062
## T.updat 0.0496413250
## T.gas 0.0495248812
## T.respond 0.0493495900
## T.per 0.0492077392
## T.full 0.0490145812
## T.case 0.0487433346
## T.return 0.0478946173
## T.feel 0.0474010255
## T.version 0.0472326491
## T.idea 0.0471329213
## T.trade 0.0467818091
## T.open 0.0465715601
## T.high 0.0464722890
## T.process 0.0461567802
## T.might 0.0460962123
## T.get 0.0457042259
## T.detail 0.0444871076
## T.addit 0.0437829267
## T.com 0.0433966800
## T.project 0.0432261317
## T.time 0.0431450343
## T.year 0.0430454013
## T.don 0.0428995876
## T.end 0.0420623769
## T.sincer 0.0414940836
## T.respons 0.0410279129
## T.come 0.0410008395
## T.contract 0.0405180394
## T.www 0.0399480693
## T.best 0.0398959074
## T.sinc 0.0396341699
## T.today 0.0394257429
## T.cours 0.0391356944
## T.can 0.0390445514
## T.continu 0.0383716340
## T.name 0.0383030252
## T.anoth 0.0378074765
## T.locat 0.0377448379
## T.send 0.0376573942
## T.even 0.0375582942
## T.review 0.0367880188
## T.mani 0.0362883851
## T.sorri 0.0362540720
## T.posit 0.0360326427
## T.put 0.0359657007
## T.move 0.0359061954
## T.price 0.0351068190
## T.experi 0.0350513391
## T.involv 0.0347124963
## T.given 0.0346627918
## T.num.words.unq 0.0345410286
## T.deal 0.0343820752
## T.current 0.0342745848
## T.area 0.0340797023
## T.form 0.0338707071
## T.servic 0.0337403188
## T.bring 0.0331450379
## T.system 0.0324103996
## T.want 0.0323104205
## T.increas 0.0309706690
## T.look 0.0309057968
## T.opportun 0.0307220593
## T.approv 0.0307077438
## T.creat 0.0301405208
## T.resourc 0.0292356177
## T.relat 0.0288844112
## T.credit 0.0287077376
## T.visit 0.0286237098
## T.hour 0.0281047902
## T.corpor 0.0280793191
## T.check 0.0279556976
## T.good 0.0272279489
## T.say 0.0262260668
## T.engin 0.0253306210
## T.alreadi 0.0250216840
## T.long 0.0249203970
## T.specif 0.0246339979
## T.differ 0.0243752924
## T.great 0.0236211947
## T.number 0.0234857598
## T.hello 0.0233464644
## T.intern 0.0232839939
## T.member 0.0224540197
## T.event 0.0215873205
## T.plan 0.0215226695
## T.keep 0.0213379127
## T.person 0.0213046754
## T.may 0.0209343125
## T.avail 0.0203738484
## T.chang 0.0200456328
## T.recent 0.0199602773
## T.see 0.0196578866
## T.file 0.0192972102
## T.better 0.0191663387
## T.part 0.0184082049
## T.unit 0.0183074565
## T.month 0.0181070117
## T.real 0.0176738908
## T.program 0.0175949191
## T.interest 0.0169701603
## T.short 0.0169549005
## T.write 0.0163589185
## T.place 0.0162785786
## T.term 0.0157925005
## T.access 0.0155755615
## .rnorm 0.0149916451
## T.import 0.0145290721
## T.new 0.0132368059
## T.valu 0.0130686578
## T.complet 0.0127091694
## T.sure 0.0124904057
## T.public 0.0123208718
## T.find 0.0121708040
## T.state 0.0113101183
## T.type 0.0109271706
## T.done 0.0108049574
## T.rate 0.0107974523
## T.much 0.0103939917
## T.cost 0.0094544027
## T.financi 0.0093179100
## T.includ 0.0091397492
## T.industri 0.0086920687
## T.lot 0.0081198806
## T.first 0.0077751788
## T.oper 0.0074123474
## T.sever 0.0065969188
## T.use 0.0063128080
## T.base 0.0062517784
## T.market 0.0058950194
## T.applic 0.0050352482
## T.tri 0.0038171490
## T.effort 0.0034438302
## T.due 0.0034232838
## T.allow 0.0030796408
## T.take 0.0023729634
## T.realli 0.0019275089
## T.day 0.0007891443
## T.assist 0.0006189100
## T.messag 0.0005575180
## T.requir 0.0004346765
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="remove_correlated_features",
chunk_step_major=max(glb_script_df$chunk_step_major),
chunk_step_minor=glb_script_df[nrow(glb_script_df), "chunk_step_minor"]+1,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor
## elapsed6 select_features 4 0
## elapsed7 remove_correlated_features 4 1
## elapsed
## elapsed6 54.204
## elapsed7 55.580
5: fit modelsglb_models_lst <- list(); glb_models_df <- data.frame()
if (glb_is_classification && glb_is_binomial &&
(length(unique(glb_trnent_df[, glb_rsp_var])) < 2))
stop("glb_trnent_df$", glb_rsp_var, ": contains less than 2 unique values: ",
paste0(unique(glb_trnent_df[, glb_rsp_var]), collapse=", "))
max_cor_y_x_var <- orderBy(~ -cor.y.abs,
subset(glb_feats_df, (exclude.as.feat == 0) & !is.cor.y.abs.low))[1, "id"]
if (!is.null(glb_Baseline_mdl_var)) {
if ((max_cor_y_x_var != glb_Baseline_mdl_var) &
(glb_feats_df[max_cor_y_x_var, "cor.y.abs"] >
glb_feats_df[glb_Baseline_mdl_var, "cor.y.abs"]))
stop(max_cor_y_x_var, " has a lower correlation with ", glb_rsp_var,
" than the Baseline var: ", glb_Baseline_mdl_var)
}
glb_model_type <- ifelse(glb_is_regression, "regression", "classification")
# Baseline
if (!is.null(glb_Baseline_mdl_var))
ret_lst <- myfit_mdl_fn(model_id="Baseline", model_method="mybaseln_classfr",
indep_vars_vctr=glb_Baseline_mdl_var,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df)
# Most Frequent Outcome "MFO" model: mean(y) for regression
# Not using caret's nullModel since model stats not avl
# Cannot use rpart for multinomial classification since it predicts non-MFO
ret_lst <- myfit_mdl(model_id="MFO",
model_method=ifelse(glb_is_regression, "lm", "myMFO_classfr"),
model_type=glb_model_type,
indep_vars_vctr=".rnorm",
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df)
## [1] "fitting model: MFO.myMFO_classfr"
## [1] " indep_vars: .rnorm"
## Fitting parameter = none on full training set
## [1] "in MFO.Classifier$fit"
## [1] "unique.vals:"
## [1] N Y
## Levels: N Y
## [1] "unique.prob:"
## y
## N Y
## 0.7610973 0.2389027
## [1] "MFO.val:"
## [1] "N"
## Length Class Mode
## unique.vals 2 factor numeric
## unique.prob 2 -none- numeric
## MFO.val 1 -none- character
## x.names 1 -none- character
## xNames 1 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## Loading required package: ROCR
## Loading required package: gplots
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
## [1] "in MFO.Classifier$predict"
## [1] "in MFO.Classifier$prob"
## N Y
## 1 0.7610973 0.2389027
## 2 0.7610973 0.2389027
## 3 0.7610973 0.2389027
## 4 0.7610973 0.2389027
## 5 0.7610973 0.2389027
## 6 0.7610973 0.2389027
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.MFO.myMFO_classfr.N
## 1 N 3052
## 2 Y 958
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.MFO.myMFO_classfr.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.MFO.myMFO_classfr.Y
## 1 0
## 2 0
## Prediction
## Reference N Y
## N 3052 0
## Y 958 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.610973e-01 0.000000e+00 7.475872e-01 7.742259e-01 7.610973e-01
## AccuracyPValue McnemarPValue
## 5.086718e-01 6.580650e-210
## [1] "in MFO.Classifier$predict"
## [1] "in MFO.Classifier$prob"
## N Y
## 1 0.7610973 0.2389027
## 2 0.7610973 0.2389027
## 3 0.7610973 0.2389027
## 4 0.7610973 0.2389027
## 5 0.7610973 0.2389027
## 6 0.7610973 0.2389027
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.MFO.myMFO_classfr.N
## 1 N 1308
## 2 Y 410
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.MFO.myMFO_classfr.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.MFO.myMFO_classfr.Y
## 1 0
## 2 0
## Prediction
## Reference N Y
## N 1308 0
## Y 410 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.613504e-01 0.000000e+00 7.404667e-01 7.813376e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 5.132530e-01 9.975777e-91
## model_id model_method feats max.nTuningRuns
## 1 MFO.myMFO_classfr myMFO_classfr .rnorm 0
## min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1 0.368 0.003 0.5
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0 0.7610973
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.7475872 0.7742259 0 0.5
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0 0.7613504
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7404667 0.7813376 0
if (glb_is_classification)
# "random" model - only for classification;
# none needed for regression since it is same as MFO
ret_lst <- myfit_mdl(model_id="Random", model_method="myrandom_classfr",
model_type=glb_model_type,
indep_vars_vctr=".rnorm",
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df)
## [1] "fitting model: Random.myrandom_classfr"
## [1] " indep_vars: .rnorm"
## Fitting parameter = none on full training set
## Length Class Mode
## unique.vals 2 factor numeric
## unique.prob 2 table numeric
## xNames 1 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] "in Random.Classifier$prob"
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 2281 720
## Y 771 238
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 2281
## 2 Y 720
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 771
## 2 238
## Reference
## Prediction N Y
## N 2281 720
## Y 771 238
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 2281
## 2 Y 720
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 771
## 2 238
## Reference
## Prediction N Y
## N 2281 720
## Y 771 238
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 2281
## 2 Y 720
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 771
## 2 238
## Reference
## Prediction N Y
## N 2281 720
## Y 771 238
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 2281
## 2 Y 720
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 771
## 2 238
## Reference
## Prediction N Y
## N 2281 720
## Y 771 238
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 2281
## 2 Y 720
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 771
## 2 238
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3856683
## 2 0.1 0.3856683
## 3 0.2 0.3856683
## 4 0.3 0.2419929
## 5 0.4 0.2419929
## 6 0.5 0.2419929
## 7 0.6 0.2419929
## 8 0.7 0.2419929
## 9 0.8 0.0000000
## 10 0.9 0.0000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.2000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.Y
## 1 N 3052
## 2 Y 958
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 3052
## 2 958
## Prediction
## Reference N Y
## N 0 3052
## Y 0 958
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2389027 0.0000000 0.2257741 0.2524128 0.7610973
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
## [1] "in Random.Classifier$prob"
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 970 300
## Y 338 110
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 970
## 2 Y 300
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 338
## 2 110
## Reference
## Prediction N Y
## N 970 300
## Y 338 110
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 970
## 2 Y 300
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 338
## 2 110
## Reference
## Prediction N Y
## N 970 300
## Y 338 110
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 970
## 2 Y 300
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 338
## 2 110
## Reference
## Prediction N Y
## N 970 300
## Y 338 110
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 970
## 2 Y 300
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 338
## 2 110
## Reference
## Prediction N Y
## N 970 300
## Y 338 110
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 970
## 2 Y 300
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 338
## 2 110
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3853383
## 2 0.1 0.3853383
## 3 0.2 0.3853383
## 4 0.3 0.2564103
## 5 0.4 0.2564103
## 6 0.5 0.2564103
## 7 0.6 0.2564103
## 8 0.7 0.2564103
## 9 0.8 0.0000000
## 10 0.9 0.0000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.2000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.Y
## 1 N 1308
## 2 Y 410
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Random.myrandom_classfr.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Random.myrandom_classfr.Y
## 1 1308
## 2 410
## Prediction
## Reference N Y
## N 0 1308
## Y 0 410
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 2.386496e-01 0.000000e+00 2.186624e-01 2.595333e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 5.612725e-286
## model_id model_method feats max.nTuningRuns
## 1 Random.myrandom_classfr myrandom_classfr .rnorm 0
## min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1 0.255 0.001 0.4979065
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.2 0.3856683 0.2389027
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.2257741 0.2524128 0 0.5049414
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.2 0.3853383 0.2386496
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.2186624 0.2595333 0
# Any models that have tuning parameters has "better" results with cross-validation
# (except rf) & "different" results for different outcome metrics
# Max.cor.Y
# Check impact of cv
# rpart is not a good candidate since caret does not optimize cp (only tuning parameter of rpart) well
ret_lst <- myfit_mdl(model_id="Max.cor.Y.cv.0",
model_method="rpart",
model_type=glb_model_type,
indep_vars_vctr=max_cor_y_x_var,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df)
## [1] "fitting model: Max.cor.Y.cv.0.rpart"
## [1] " indep_vars: T.vinc"
## Loading required package: rpart
## Fitting cp = 0 on full training set
## Loading required package: rpart.plot
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 4010
##
## CP nsplit rel error
## 1 0 0 1
##
## Node number 1: 4010 observations
## predicted class=N expected loss=0.2389027 P(node) =1
## class counts: 3052 958
## probabilities: 0.761 0.239
##
## n= 4010
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 4010 958 N (0.7610973 0.2389027) *
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Max.cor.Y.cv.0.rpart.N
## 1 N 3052
## 2 Y 958
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.cv.0.rpart.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Max.cor.Y.cv.0.rpart.Y
## 1 0
## 2 0
## Prediction
## Reference N Y
## N 3052 0
## Y 958 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.610973e-01 0.000000e+00 7.475872e-01 7.742259e-01 7.610973e-01
## AccuracyPValue McnemarPValue
## 5.086718e-01 6.580650e-210
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Max.cor.Y.cv.0.rpart.N
## 1 N 1308
## 2 Y 410
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.cv.0.rpart.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Max.cor.Y.cv.0.rpart.Y
## 1 0
## 2 0
## Prediction
## Reference N Y
## N 1308 0
## Y 410 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.613504e-01 0.000000e+00 7.404667e-01 7.813376e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 5.132530e-01 9.975777e-91
## model_id model_method feats max.nTuningRuns
## 1 Max.cor.Y.cv.0.rpart rpart T.vinc 0
## min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1 0.629 0.042 0.5
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0 0.7610973
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.7475872 0.7742259 0 0.5
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0 0.7613504
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7404667 0.7813376 0
ret_lst <- myfit_mdl(model_id="Max.cor.Y.cv.0.cp.0",
model_method="rpart",
model_type=glb_model_type,
indep_vars_vctr=max_cor_y_x_var,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df,
n_cv_folds=0,
tune_models_df=data.frame(parameter="cp", min=0.0, max=0.0, by=0.1))
## [1] "fitting model: Max.cor.Y.cv.0.cp.0.rpart"
## [1] " indep_vars: T.vinc"
## Fitting cp = 0 on full training set
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 4010
##
## CP nsplit rel error
## 1 0 0 1
##
## Node number 1: 4010 observations
## predicted class=N expected loss=0.2389027 P(node) =1
## class counts: 3052 958
## probabilities: 0.761 0.239
##
## n= 4010
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 4010 958 N (0.7610973 0.2389027) *
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.N
## 1 N 3052
## 2 Y 958
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.Y
## 1 0
## 2 0
## Prediction
## Reference N Y
## N 3052 0
## Y 958 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.610973e-01 0.000000e+00 7.475872e-01 7.742259e-01 7.610973e-01
## AccuracyPValue McnemarPValue
## 5.086718e-01 6.580650e-210
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.N
## 1 N 1308
## 2 Y 410
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.Y
## 1 0
## 2 0
## Prediction
## Reference N Y
## N 1308 0
## Y 410 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.613504e-01 0.000000e+00 7.404667e-01 7.813376e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 5.132530e-01 9.975777e-91
## model_id model_method feats max.nTuningRuns
## 1 Max.cor.Y.cv.0.cp.0.rpart rpart T.vinc 0
## min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1 0.498 0.043 0.5
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0 0.7610973
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.7475872 0.7742259 0 0.5
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0 0.7613504
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7404667 0.7813376 0
if (glb_is_regression || glb_is_binomial) # For multinomials this model will be run next by default
ret_lst <- myfit_mdl(model_id="Max.cor.Y",
model_method="rpart",
model_type=glb_model_type,
indep_vars_vctr=max_cor_y_x_var,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
## [1] "fitting model: Max.cor.Y.rpart"
## [1] " indep_vars: T.vinc"
## + Fold1: cp=0
## - Fold1: cp=0
## + Fold2: cp=0
## - Fold2: cp=0
## + Fold3: cp=0
## - Fold3: cp=0
## Aggregating results
## Fitting final model on full training set
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 4010
##
## CP nsplit rel error
## 1 0 0 1
##
## Node number 1: 4010 observations
## predicted class=N expected loss=0.2389027 P(node) =1
## class counts: 3052 958
## probabilities: 0.761 0.239
##
## n= 4010
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 4010 958 N (0.7610973 0.2389027) *
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Max.cor.Y.rpart.N
## 1 N 3052
## 2 Y 958
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.rpart.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Max.cor.Y.rpart.Y
## 1 0
## 2 0
## Prediction
## Reference N Y
## N 3052 0
## Y 958 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.610973e-01 0.000000e+00 7.475872e-01 7.742259e-01 7.610973e-01
## AccuracyPValue McnemarPValue
## 5.086718e-01 6.580650e-210
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Max.cor.Y.rpart.N
## 1 N 1308
## 2 Y 410
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.rpart.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Max.cor.Y.rpart.Y
## 1 0
## 2 0
## Prediction
## Reference N Y
## N 1308 0
## Y 410 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.613504e-01 0.000000e+00 7.404667e-01 7.813376e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 5.132530e-01 9.975777e-91
## model_id model_method feats max.nTuningRuns
## 1 Max.cor.Y.rpart rpart T.vinc 1
## min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1 1.06 0.043 0.5
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0 0.7610974
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.7475872 0.7742259 0 0.5
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0 0.7613504
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7404667 0.7813376 0
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.0001952373 0
# Used to compare vs. Interactions.High.cor.Y
ret_lst <- myfit_mdl(model_id="Max.cor.Y",
model_method=ifelse(glb_is_regression, "lm",
ifelse(glb_is_binomial, "glm", "rpart")),
model_type=glb_model_type,
indep_vars_vctr=max_cor_y_x_var,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
## [1] "fitting model: Max.cor.Y.glm"
## [1] " indep_vars: T.vinc"
## + Fold1: parameter=none
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold1: parameter=none
## + Fold2: parameter=none
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold2: parameter=none
## + Fold3: parameter=none
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold3: parameter=none
## Aggregating results
## Fitting final model on full training set
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.113 -1.113 0.000 0.000 1.244
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.15445 0.04403 -3.508 0.000451 ***
## T.vinc -18.33794 224.81205 -0.082 0.934989
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4409.5 on 4009 degrees of freedom
## Residual deviance: 2865.6 on 4008 degrees of freedom
## AIC: 2869.6
##
## Number of Fisher Scoring iterations: 20
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 1934 0
## Y 1118 958
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1934
## 2 Y 0
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 1118
## 2 958
## Reference
## Prediction N Y
## N 1934 0
## Y 1118 958
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1934
## 2 Y 0
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 1118
## 2 958
## Reference
## Prediction N Y
## N 1934 0
## Y 1118 958
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1934
## 2 Y 0
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 1118
## 2 958
## Reference
## Prediction N Y
## N 1934 0
## Y 1118 958
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1934
## 2 Y 0
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 1118
## 2 958
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3856683
## 2 0.1 0.6315096
## 3 0.2 0.6315096
## 4 0.3 0.6315096
## 5 0.4 0.6315096
## 6 0.5 0.0000000
## 7 0.6 0.0000000
## 8 0.7 0.0000000
## 9 0.8 0.0000000
## 10 0.9 0.0000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1934
## 2 Y NA
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 1118
## 2 958
## Reference
## Prediction N Y
## N 1934 0
## Y 1118 958
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1934
## 2 Y 0
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 1118
## 2 958
## Prediction
## Reference N Y
## N 1934 1118
## Y 0 958
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.211970e-01 4.525179e-01 7.070358e-01 7.350353e-01 7.610973e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 1.099523e-244
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 856 1
## Y 452 409
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 452
## 2 409
## Reference
## Prediction N Y
## N 856 1
## Y 452 409
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 452
## 2 409
## Reference
## Prediction N Y
## N 856 1
## Y 452 409
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 452
## 2 409
## Reference
## Prediction N Y
## N 856 1
## Y 452 409
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 452
## 2 409
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3853383
## 2 0.1 0.6435877
## 3 0.2 0.6435877
## 4 0.3 0.6435877
## 5 0.4 0.6435877
## 6 0.5 0.0000000
## 7 0.6 0.0000000
## 8 0.7 0.0000000
## 9 0.8 0.0000000
## 10 0.9 0.0000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 452
## 2 409
## Reference
## Prediction N Y
## N 856 1
## Y 452 409
## spam.fctr spam.fctr.predict.Max.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Max.cor.Y.glm.Y
## 1 452
## 2 409
## Prediction
## Reference N Y
## N 856 452
## Y 1 409
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.363213e-01 4.732836e-01 7.148024e-01 7.570300e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 9.926261e-01 3.211291e-99
## model_id model_method feats max.nTuningRuns
## 1 Max.cor.Y.glm glm T.vinc 1
## min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1 1.146 0.111 0.8168414
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.6315096 0.7610974
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.7070358 0.7350353 0 0.8264955
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.4 0.6435877 0.7363213
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.7148024 0.75703 0.4732836 2869.603
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.0002254406 0
# Interactions.High.cor.Y
if (length(int_feats <- setdiff(unique(glb_feats_df$cor.high.X), NA)) > 0) {
# lm & glm handle interaction terms; rpart & rf do not
if (glb_is_regression || glb_is_binomial) {
indep_vars_vctr <-
c(max_cor_y_x_var, paste(max_cor_y_x_var, int_feats, sep=":"))
} else { indep_vars_vctr <- union(max_cor_y_x_var, int_feats) }
ret_lst <- myfit_mdl(model_id="Interact.High.cor.Y",
model_method=ifelse(glb_is_regression, "lm",
ifelse(glb_is_binomial, "glm", "rpart")),
model_type=glb_model_type,
indep_vars_vctr,
glb_rsp_var, glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
}
## [1] "fitting model: Interact.High.cor.Y.glm"
## [1] " indep_vars: T.vinc, T.vinc:T.report, T.vinc:T.www, T.vinc:T.num.words.unq, T.vinc:T.num.words, T.vinc:T.hou, T.vinc:T.know"
## + Fold1: parameter=none
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold1: parameter=none
## + Fold2: parameter=none
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold2: parameter=none
## + Fold3: parameter=none
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold3: parameter=none
## Aggregating results
## Fitting final model on full training set
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.113 -1.113 0.000 0.000 1.244
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.544e-01 4.403e-02 -3.508 0.000451 ***
## T.vinc -1.939e+01 4.849e+02 -0.040 0.968101
## `T.vinc:T.report` -2.423e-01 4.341e+02 -0.001 0.999555
## `T.vinc:T.www` 3.089e-01 1.580e+02 0.002 0.998440
## `T.vinc:T.num.words.unq` 4.942e-03 1.001e+01 0.000 0.999606
## `T.vinc:T.num.words` -2.931e-04 4.184e+00 0.000 0.999944
## `T.vinc:T.hou` 1.198e-01 1.629e+01 0.007 0.994131
## `T.vinc:T.know` 4.625e-01 1.194e+02 0.004 0.996909
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4409.5 on 4009 degrees of freedom
## Residual deviance: 2865.6 on 4002 degrees of freedom
## AIC: 2881.6
##
## Number of Fisher Scoring iterations: 20
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 1934 0
## Y 1118 958
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1934
## 2 Y 0
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 1118
## 2 958
## Reference
## Prediction N Y
## N 1934 0
## Y 1118 958
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1934
## 2 Y 0
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 1118
## 2 958
## Reference
## Prediction N Y
## N 1934 0
## Y 1118 958
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1934
## 2 Y 0
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 1118
## 2 958
## Reference
## Prediction N Y
## N 1934 0
## Y 1118 958
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1934
## 2 Y 0
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 1118
## 2 958
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3856683
## 2 0.1 0.6315096
## 3 0.2 0.6315096
## 4 0.3 0.6315096
## 5 0.4 0.6315096
## 6 0.5 0.0000000
## 7 0.6 0.0000000
## 8 0.7 0.0000000
## 9 0.8 0.0000000
## 10 0.9 0.0000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1934
## 2 Y NA
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 1118
## 2 958
## Reference
## Prediction N Y
## N 1934 0
## Y 1118 958
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1934
## 2 Y 0
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 1118
## 2 958
## Prediction
## Reference N Y
## N 1934 1118
## Y 0 958
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.211970e-01 4.525179e-01 7.070358e-01 7.350353e-01 7.610973e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 1.099523e-244
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 856 1
## Y 452 409
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 452
## 2 409
## Reference
## Prediction N Y
## N 856 1
## Y 452 409
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 452
## 2 409
## Reference
## Prediction N Y
## N 856 1
## Y 452 409
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 452
## 2 409
## Reference
## Prediction N Y
## N 856 1
## Y 452 409
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 452
## 2 409
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3853383
## 2 0.1 0.6435877
## 3 0.2 0.6435877
## 4 0.3 0.6435877
## 5 0.4 0.6435877
## 6 0.5 0.0000000
## 7 0.6 0.0000000
## 8 0.7 0.0000000
## 9 0.8 0.0000000
## 10 0.9 0.0000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 452
## 2 409
## Reference
## Prediction N Y
## N 856 1
## Y 452 409
## spam.fctr spam.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 856
## 2 Y 1
## spam.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 452
## 2 409
## Prediction
## Reference N Y
## N 856 452
## Y 1 409
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.363213e-01 4.732836e-01 7.148024e-01 7.570300e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 9.926261e-01 3.211291e-99
## model_id model_method
## 1 Interact.High.cor.Y.glm glm
## feats
## 1 T.vinc, T.vinc:T.report, T.vinc:T.www, T.vinc:T.num.words.unq, T.vinc:T.num.words, T.vinc:T.hou, T.vinc:T.know
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 1.381 0.181
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.8168414 0.4 0.6315096 0.7610974
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.7070358 0.7350353 0 0.8264284
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.4 0.6435877 0.7363213
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.7148024 0.75703 0.4732836 2881.603
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.0002254406 0
# Low.cor.X
if (glb_is_classification && glb_is_binomial)
indep_vars_vctr <- subset(glb_feats_df, is.na(cor.high.X) &
is.ConditionalX.y &
(exclude.as.feat != 1))[, "id"] else
indep_vars_vctr <- subset(glb_feats_df, is.na(cor.high.X) &
(exclude.as.feat != 1))[, "id"]
ret_lst <- myfit_mdl(model_id="Low.cor.X",
model_method=ifelse(glb_is_regression, "lm",
ifelse(glb_is_binomial, "glm", "rpart")),
indep_vars_vctr=indep_vars_vctr,
model_type=glb_model_type,
glb_rsp_var, glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
## [1] "fitting model: Low.cor.X.glm"
## [1] " indep_vars: T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.research, T.pleas, T.forward, T.X2000, T.subject, T.thank"
## + Fold1: parameter=none
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold1: parameter=none
## + Fold2: parameter=none
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold2: parameter=none
## + Fold3: parameter=none
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold3: parameter=none
## Aggregating results
## Fitting final model on full training set
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.49 0.00 0.00 0.00 8.49
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.299e+14 1.242e+07 18504737 <2e-16 ***
## T.click 2.565e+14 2.410e+06 106418490 <2e-16 ***
## T.life 1.036e+15 3.571e+06 290155749 <2e-16 ***
## T.remov 4.132e+14 3.371e+06 122576714 <2e-16 ***
## T.websit 1.151e+13 2.268e+06 5076696 <2e-16 ***
## T.money 2.933e+14 2.345e+06 125067277 <2e-16 ***
## T.now 3.390e+14 2.283e+06 148485507 <2e-16 ***
## T.onlin 3.794e+14 2.585e+06 146764279 <2e-16 ***
## T.receiv 1.063e+13 1.913e+06 5559810 <2e-16 ***
## T.softwar 3.451e+14 2.173e+06 158813876 <2e-16 ***
## T.invest 2.975e+14 2.301e+06 129335161 <2e-16 ***
## T.without 2.711e+13 4.546e+06 5963405 <2e-16 ***
## T.secur 1.778e+13 1.999e+06 8890380 <2e-16 ***
## T.has.http 4.064e+14 3.653e+06 111261436 <2e-16 ***
## T.offer 1.460e+14 1.986e+06 73533087 <2e-16 ***
## T.special 1.701e+14 3.596e+06 47291717 <2e-16 ***
## T.free 7.441e+13 1.917e+06 38809297 <2e-16 ***
## T.within 3.114e+14 3.424e+06 90945927 <2e-16 ***
## T.account 2.316e+14 1.539e+06 150442444 <2e-16 ***
## T.net 1.722e+14 2.511e+06 68564148 <2e-16 ***
## T.just 1.262e+14 2.402e+06 52541420 <2e-16 ***
## T.compani 3.049e+13 1.279e+06 23844086 <2e-16 ***
## T.wish 3.360e+13 4.090e+06 8214376 <2e-16 ***
## T.custom 7.131e+13 2.454e+06 29055234 <2e-16 ***
## T.busi 4.292e+13 1.359e+06 31585538 <2e-16 ***
## T.right 1.469e+14 3.222e+06 45607886 <2e-16 ***
## T.futur 9.792e+13 3.148e+06 31106073 <2e-16 ***
## T.email 4.227e+13 1.152e+06 36706822 <2e-16 ***
## T.site 7.725e+13 1.921e+06 40218962 <2e-16 ***
## T.address -1.669e+13 1.955e+06 -8538926 <2e-16 ***
## T.home 1.106e+14 2.440e+06 45336103 <2e-16 ***
## T.internet -6.746e+13 2.700e+06 -24991175 <2e-16 ***
## T.X000 5.659e+13 1.640e+06 34516678 <2e-16 ***
## T.effect 1.915e+14 4.145e+06 46200402 <2e-16 ***
## T.list 1.006e+14 1.736e+06 57957373 <2e-16 ***
## T.made -2.279e+12 3.841e+06 -593174 <2e-16 ***
## T.success 1.067e+14 3.188e+06 33470334 <2e-16 ***
## T.order -4.939e+13 1.608e+06 -30713952 <2e-16 ***
## T.result 5.568e+13 3.166e+06 17586923 <2e-16 ***
## T.buy 2.368e+14 3.786e+06 62549171 <2e-16 ***
## T.link 1.141e+14 3.389e+06 33667752 <2e-16 ***
## T.design 4.127e+13 3.251e+06 12695150 <2e-16 ***
## T.peopl -4.647e+13 2.684e+06 -17313410 <2e-16 ***
## T.product 4.849e+13 1.943e+06 24952833 <2e-16 ***
## T.repli 2.186e+14 4.070e+06 53704294 <2e-16 ***
## T.line 3.824e+13 3.570e+06 10710977 <2e-16 ***
## T.thing 1.182e+14 3.699e+06 31938201 <2e-16 ***
## T.inform 9.208e+13 1.470e+06 62650396 <2e-16 ***
## T.info -3.837e+13 3.108e+06 -12345202 <2e-16 ***
## T.immedi 2.672e+14 4.821e+06 55421693 <2e-16 ***
## T.make -7.753e+13 1.928e+06 -40211806 <2e-16 ***
## T.way -3.360e+12 2.981e+06 -1126917 <2e-16 ***
## T.report -6.558e+13 1.556e+06 -42152099 <2e-16 ***
## T.start 2.910e+13 2.502e+06 11632781 <2e-16 ***
## T.believ 1.004e+14 4.463e+06 22490306 <2e-16 ***
## T.web -5.801e+13 2.947e+06 -19686440 <2e-16 ***
## T.provid -4.066e+13 2.225e+06 -18275594 <2e-16 ***
## T.mean 8.860e+13 4.619e+06 19181897 <2e-16 ***
## T.read 3.478e+13 4.182e+06 8315355 <2e-16 ***
## T.one 7.705e+13 1.857e+06 41502619 <2e-16 ***
## T.expect -1.024e+14 3.283e+06 -31178078 <2e-16 ***
## T.per 1.003e+14 2.187e+06 45868665 <2e-16 ***
## T.full 1.332e+14 3.656e+06 36438224 <2e-16 ***
## T.return 1.673e+14 3.594e+06 46535661 <2e-16 ***
## T.version -1.684e+14 2.713e+06 -62063034 <2e-16 ***
## T.high 1.016e+14 2.860e+06 35519824 <2e-16 ***
## T.get 3.194e+13 1.534e+06 20824361 <2e-16 ***
## T.don -5.172e+12 3.231e+06 -1600549 <2e-16 ***
## T.www -8.856e+13 1.525e+06 -58068724 <2e-16 ***
## T.best -2.395e+13 2.439e+06 -9818815 <2e-16 ***
## T.today -6.460e+13 2.767e+06 -23341156 <2e-16 ***
## T.name -5.033e+13 1.879e+06 -26782309 <2e-16 ***
## T.send -1.782e+14 2.188e+06 -81463594 <2e-16 ***
## T.even -1.630e+14 3.261e+06 -50000448 <2e-16 ***
## T.mani 7.041e+13 3.261e+06 21591253 <2e-16 ***
## T.involv 3.418e+13 4.340e+06 7876836 <2e-16 ***
## T.form -7.595e+12 2.825e+06 -2688165 <2e-16 ***
## T.servic -1.663e+13 1.636e+06 -10164957 <2e-16 ***
## T.system -3.510e+12 1.823e+06 -1925707 <2e-16 ***
## T.want -2.508e+13 1.908e+06 -13141231 <2e-16 ***
## T.increas 1.773e+13 2.750e+06 6447679 <2e-16 ***
## T.creat 1.709e+14 3.889e+06 43956210 <2e-16 ***
## T.hour 1.585e+14 2.878e+06 55052849 <2e-16 ***
## T.corpor 3.916e+12 2.910e+06 1345721 <2e-16 ***
## T.check -1.117e+12 3.252e+06 -343340 <2e-16 ***
## T.say 1.015e+13 3.780e+06 2686101 <2e-16 ***
## T.engin 1.764e+14 3.093e+06 57022494 <2e-16 ***
## T.hello 4.319e+13 3.172e+06 13617508 <2e-16 ***
## T.keep -2.104e+14 3.838e+06 -54813482 <2e-16 ***
## T.person 5.865e+13 2.835e+06 20688215 <2e-16 ***
## T.avail -3.818e+13 2.415e+06 -15811847 <2e-16 ***
## T.month 3.610e+13 1.940e+06 18605464 <2e-16 ***
## T.real 1.181e+14 2.592e+06 45554776 <2e-16 ***
## T.program 7.207e+13 1.385e+06 52030777 <2e-16 ***
## T.write 1.003e+14 4.260e+06 23536188 <2e-16 ***
## T.place 1.503e+14 4.102e+06 36625611 <2e-16 ***
## T.import -2.463e+14 3.925e+06 -62740368 <2e-16 ***
## T.complet -4.224e+13 2.998e+06 -14088891 <2e-16 ***
## T.state 1.578e+14 2.019e+06 78193462 <2e-16 ***
## T.type -1.036e+14 3.496e+06 -29639344 <2e-16 ***
## T.done 3.240e+12 4.809e+06 673774 <2e-16 ***
## T.rate 1.527e+12 2.003e+06 762372 <2e-16 ***
## T.financi -1.446e+14 2.215e+06 -65301758 <2e-16 ***
## T.includ -5.798e+13 2.464e+06 -23533454 <2e-16 ***
## T.industri -7.277e+13 2.342e+06 -31068605 <2e-16 ***
## T.lot -2.696e+12 4.956e+06 -544035 <2e-16 ***
## T.sever 1.435e+14 4.354e+06 32960398 <2e-16 ***
## T.base -2.604e+13 2.608e+06 -9984127 <2e-16 ***
## T.applic 7.190e+13 2.642e+06 27212437 <2e-16 ***
## T.due 1.722e+14 4.136e+06 41635015 <2e-16 ***
## T.realli 9.539e+13 4.438e+06 21493506 <2e-16 ***
## T.day -1.204e+13 1.662e+06 -7244157 <2e-16 ***
## T.requir 1.144e+14 2.934e+06 39009629 <2e-16 ***
## T.messag 1.623e+14 1.601e+06 101319157 <2e-16 ***
## T.assist 1.438e+13 3.365e+06 4274487 <2e-16 ***
## T.take -2.353e+13 2.244e+06 -10484759 <2e-16 ***
## T.allow 7.879e+13 4.028e+06 19563253 <2e-16 ***
## T.effort -1.372e+14 3.340e+06 -41074382 <2e-16 ***
## T.tri 2.988e+12 3.237e+06 923056 <2e-16 ***
## T.market 2.789e+13 1.044e+06 26705976 <2e-16 ***
## T.use -1.164e+13 1.532e+06 -7597367 <2e-16 ***
## T.oper -9.495e+13 2.413e+06 -39340880 <2e-16 ***
## T.first 8.755e+13 2.450e+06 35728018 <2e-16 ***
## T.cost -1.354e+14 2.044e+06 -66228356 <2e-16 ***
## T.much -1.416e+14 3.104e+06 -45626704 <2e-16 ***
## T.find -1.420e+14 2.899e+06 -48976474 <2e-16 ***
## T.public -1.082e+14 3.350e+06 -32291714 <2e-16 ***
## T.sure 1.003e+14 4.230e+06 23704678 <2e-16 ***
## T.valu 2.458e+13 2.000e+06 12289316 <2e-16 ***
## T.new -5.767e+13 1.361e+06 -42384304 <2e-16 ***
## .rnorm -2.289e+13 1.089e+06 -21024193 <2e-16 ***
## T.access -1.938e+14 2.228e+06 -87007094 <2e-16 ***
## T.term -5.727e+13 2.994e+06 -19128904 <2e-16 ***
## T.short 1.176e+12 3.289e+06 357653 <2e-16 ***
## T.interest 1.385e+14 1.877e+06 73819037 <2e-16 ***
## T.unit -4.340e+13 2.342e+06 -18534028 <2e-16 ***
## T.part -2.728e+14 3.381e+06 -80699608 <2e-16 ***
## T.better 1.891e+14 4.248e+06 44503311 <2e-16 ***
## T.file 6.726e+12 1.991e+06 3377188 <2e-16 ***
## T.see -1.319e+13 2.256e+06 -5847603 <2e-16 ***
## T.recent -1.853e+14 4.334e+06 -42752787 <2e-16 ***
## T.chang -2.923e+14 2.198e+06 -132948167 <2e-16 ***
## T.may -5.789e+13 1.329e+06 -43547253 <2e-16 ***
## T.plan -2.123e+14 2.028e+06 -104649738 <2e-16 ***
## T.event 3.068e+13 2.300e+06 13341111 <2e-16 ***
## T.member -8.333e+13 2.952e+06 -28226400 <2e-16 ***
## T.intern -1.095e+13 2.653e+06 -4127140 <2e-16 ***
## T.number 7.754e+13 2.164e+06 35835506 <2e-16 ***
## T.great 4.082e+13 2.792e+06 14618003 <2e-16 ***
## T.differ -8.500e+13 3.240e+06 -26230159 <2e-16 ***
## T.specif -6.580e+14 4.120e+06 -159718412 <2e-16 ***
## T.long -3.871e+13 3.905e+06 -9913556 <2e-16 ***
## T.alreadi -1.819e+14 4.414e+06 -41207331 <2e-16 ***
## T.good -2.979e+13 2.690e+06 -11074671 <2e-16 ***
## T.visit 9.053e+13 1.864e+06 48556063 <2e-16 ***
## T.credit -9.742e+12 1.570e+06 -6206433 <2e-16 ***
## T.relat -2.138e+14 3.175e+06 -67335537 <2e-16 ***
## T.resourc -3.889e+13 3.015e+06 -12897221 <2e-16 ***
## T.approv -3.058e+13 2.457e+06 -12448906 <2e-16 ***
## T.opportun 2.514e+13 2.409e+06 10438934 <2e-16 ***
## T.look 1.089e+14 1.939e+06 56190080 <2e-16 ***
## T.bring -5.160e+13 4.456e+06 -11581201 <2e-16 ***
## T.area 3.051e+14 3.304e+06 92341487 <2e-16 ***
## T.current 2.699e+13 2.388e+06 11302110 <2e-16 ***
## T.deal -3.758e+12 2.739e+06 -1372235 <2e-16 ***
## T.num.words.unq -4.081e+12 1.005e+05 -40609796 <2e-16 ***
## T.given -2.198e+14 4.615e+06 -47631083 <2e-16 ***
## T.experi -1.337e+14 4.147e+06 -32234119 <2e-16 ***
## T.price 9.477e+12 9.828e+05 9642890 <2e-16 ***
## T.move 1.049e+14 2.947e+06 35578618 <2e-16 ***
## T.put -1.798e+13 4.281e+06 -4201058 <2e-16 ***
## T.posit -3.702e+13 1.969e+06 -18806824 <2e-16 ***
## T.sorri 1.399e+13 4.196e+06 3333399 <2e-16 ***
## T.review -6.505e+13 1.877e+06 -34646965 <2e-16 ***
## T.locat -2.091e+13 3.625e+06 -5768176 <2e-16 ***
## T.anoth -1.760e+14 3.936e+06 -44703704 <2e-16 ***
## T.continu 5.016e+13 3.648e+06 13748419 <2e-16 ***
## T.can 1.000e+13 1.281e+06 7811824 <2e-16 ***
## T.cours 8.724e+11 1.794e+06 486323 <2e-16 ***
## T.sinc -9.081e+13 3.683e+06 -24658292 <2e-16 ***
## T.contract 3.076e+13 2.464e+06 12483541 <2e-16 ***
## T.come 3.086e+13 2.857e+06 10802189 <2e-16 ***
## T.respons -5.620e+13 2.915e+06 -19280965 <2e-16 ***
## T.sincer -3.114e+14 4.394e+06 -70880466 <2e-16 ***
## T.end -1.195e+14 2.098e+06 -56957696 <2e-16 ***
## T.year -1.108e+14 1.450e+06 -76389260 <2e-16 ***
## T.time -2.523e+13 1.334e+06 -18914321 <2e-16 ***
## T.project 9.627e+13 1.496e+06 64372418 <2e-16 ***
## T.com -6.926e+12 5.596e+05 -12376374 <2e-16 ***
## T.addit -1.504e+14 2.974e+06 -50578634 <2e-16 ***
## T.detail 1.326e+14 3.244e+06 40875515 <2e-16 ***
## T.might 3.277e+13 3.350e+06 9781827 <2e-16 ***
## T.process 1.372e+14 2.280e+06 60177515 <2e-16 ***
## T.open 1.286e+14 3.801e+06 33838732 <2e-16 ***
## T.trade -1.317e+14 1.383e+06 -95250222 <2e-16 ***
## T.idea -6.469e+13 4.418e+06 -14641523 <2e-16 ***
## T.feel 4.263e+13 3.826e+06 11142032 <2e-16 ***
## T.case -2.124e+14 2.821e+06 -75294571 <2e-16 ***
## T.respond 2.353e+14 4.361e+06 53959784 <2e-16 ***
## T.gas -1.107e+12 1.451e+06 -762636 <2e-16 ***
## T.updat -1.802e+14 2.587e+06 -69632420 <2e-16 ***
## T.either -1.251e+13 5.264e+06 -2376751 <2e-16 ***
## T.communic -3.061e+13 7.587e+05 -40348915 <2e-16 ***
## T.final 1.350e+14 3.448e+06 39162226 <2e-16 ***
## T.need -4.506e+13 1.652e+06 -27275744 <2e-16 ***
## T.well -1.361e+14 2.510e+06 -54238422 <2e-16 ***
## T.num.chars 3.278e+11 4.471e+03 73307330 <2e-16 ***
## T.support -1.114e+14 2.347e+06 -47475709 <2e-16 ***
## T.join -1.963e+14 3.948e+06 -49727540 <2e-16 ***
## T.give -2.773e+13 2.866e+06 -9675421 <2e-16 ***
## T.power -3.484e+13 1.150e+06 -30297924 <2e-16 ***
## T.juli -1.893e+14 1.990e+06 -95103693 <2e-16 ***
## T.follow 9.473e+12 2.111e+06 4488422 <2e-16 ***
## T.back -1.443e+14 2.731e+06 -52841133 <2e-16 ***
## T.direct -2.050e+14 3.097e+06 -66195904 <2e-16 ***
## T.abl -1.995e+13 3.379e+06 -5904718 <2e-16 ***
## T.phone -3.041e+13 2.359e+06 -12888622 <2e-16 ***
## T.director -7.976e+13 2.951e+06 -27028392 <2e-16 ***
## T.help 3.463e+13 2.072e+06 16712085 <2e-16 ***
## T.two -1.788e+14 2.659e+06 -67248108 <2e-16 ***
## T.point -7.486e+13 3.087e+06 -24251770 <2e-16 ***
## T.note -9.409e+13 3.049e+06 -30858976 <2e-16 ***
## T.thought 2.374e+13 4.540e+06 5229881 <2e-16 ***
## T.book 3.667e+13 2.048e+06 17904967 <2e-16 ***
## T.offic -1.597e+14 2.178e+06 -73323876 <2e-16 ***
## T.hear -8.218e+13 4.084e+06 -20120791 <2e-16 ***
## T.comment -2.954e+14 2.817e+06 -104848170 <2e-16 ***
## T.copi -2.102e+14 2.136e+06 -98422954 <2e-16 ***
## T.associ -7.259e+13 1.823e+06 -39829086 <2e-16 ***
## T.particip -8.076e+13 2.264e+06 -35673053 <2e-16 ***
## T.etc -1.206e+14 3.687e+06 -32717174 <2e-16 ***
## T.still -7.299e+13 3.725e+06 -19595733 <2e-16 ***
## T.confirm -1.529e+14 2.583e+06 -59192951 <2e-16 ***
## T.will -6.651e+13 7.527e+05 -88353628 <2e-16 ***
## T.run -3.311e+13 3.633e+06 -9111549 <2e-16 ***
## T.fax 4.323e+12 1.672e+06 2585533 <2e-16 ***
## T.work -1.727e+14 1.427e+06 -121016119 <2e-16 ***
## T.origin 2.166e+14 2.649e+06 81762032 <2e-16 ***
## T.problem 3.305e+13 3.108e+06 10632018 <2e-16 ***
## T.contact 4.378e+13 2.094e+06 20906063 <2e-16 ***
## T.next. 1.214e+14 2.415e+06 50268075 <2e-16 ***
## T.understand -1.465e+13 3.550e+06 -4127797 <2e-16 ***
## T.howev -1.136e+14 3.549e+06 -32000965 <2e-16 ***
## T.develop 3.118e+12 9.787e+05 3185476 <2e-16 ***
## T.mention -5.643e+13 4.492e+06 -12560973 <2e-16 ***
## T.data -1.373e+14 1.723e+06 -79709430 <2e-16 ***
## T.issu -5.822e+13 2.524e+06 -23065280 <2e-16 ***
## T.sent -1.975e+14 2.804e+06 -70453701 <2e-16 ***
## T.togeth 1.101e+13 4.381e+06 2512516 <2e-16 ***
## T.begin -1.556e+14 5.108e+06 -30466032 <2e-16 ***
## T.team -7.897e+13 2.048e+06 -38563398 <2e-16 ***
## T.deriv -2.463e+13 2.344e+06 -10511103 <2e-16 ***
## T.present -2.826e+13 1.401e+06 -20170154 <2e-16 ***
## T.happi -5.311e+13 4.178e+06 -12712405 <2e-16 ***
## T.date -1.039e+14 2.165e+06 -47995053 <2e-16 ***
## T.also 1.682e+14 2.147e+06 78308156 <2e-16 ***
## T.set -1.988e+14 3.095e+06 -64226117 <2e-16 ***
## T.tuesday -2.634e+14 3.277e+06 -80381308 <2e-16 ***
## T.mark -2.275e+14 1.947e+06 -116855982 <2e-16 ***
## T.soon 9.040e+13 3.994e+06 22631969 <2e-16 ***
## T.last -9.981e+13 2.571e+06 -38821926 <2e-16 ***
## T.request -1.979e+14 1.614e+06 -122598540 <2e-16 ***
## T.april -2.386e+14 2.430e+06 -98222557 <2e-16 ***
## T.risk -5.270e+13 1.097e+06 -48050742 <2e-16 ***
## T.like 1.616e+13 1.745e+06 9263846 <2e-16 ***
## T.num.words.unq.log 6.631e+14 1.271e+07 52181371 <2e-16 ***
## T.invit -7.829e+13 2.539e+06 -30828186 <2e-16 ***
## T.depart -1.282e+14 3.039e+06 -42180154 <2e-16 ***
## T.think -1.620e+14 2.367e+06 -68435593 <2e-16 ***
## T.analysi -7.633e+13 3.101e+06 -24612280 <2e-16 ***
## T.possibl -1.820e+14 3.209e+06 -56731149 <2e-16 ***
## T.week -8.488e+13 1.953e+06 -43449974 <2e-16 ***
## T.school -1.068e+14 2.467e+06 -43273083 <2e-16 ***
## T.london -1.256e+14 1.872e+06 -67083140 <2e-16 ***
## T.robert -2.498e+14 2.926e+06 -85369438 <2e-16 ***
## T.student -9.025e+13 1.992e+06 -45317633 <2e-16 ***
## T.call -6.235e+13 1.594e+06 -39111161 <2e-16 ***
## T.option -1.099e+13 1.299e+06 -8461170 <2e-16 ***
## T.confer -4.614e+13 1.353e+06 -34110566 <2e-16 ***
## T.dear -6.055e+13 3.444e+06 -17583802 <2e-16 ***
## T.edu -4.844e+12 8.361e+05 -5793682 <2e-16 ***
## T.wednesday 9.843e+13 3.493e+06 28177643 <2e-16 ***
## T.arrang 2.238e+13 3.841e+06 5828093 <2e-16 ***
## T.manag -2.046e+12 1.359e+06 -1506151 <2e-16 ***
## T.ask 3.793e+13 2.458e+06 15429537 <2e-16 ***
## T.energi -7.921e+13 1.034e+06 -76593947 <2e-16 ***
## T.financ -3.035e+13 2.025e+06 -14986404 <2e-16 ***
## T.doc -3.338e+14 2.339e+06 -142732554 <2e-16 ***
## T.suggest -2.872e+14 3.184e+06 -90194195 <2e-16 ***
## T.friday 2.332e+13 2.433e+06 9585100 <2e-16 ***
## T.resum -5.252e+13 1.806e+06 -29071565 <2e-16 ***
## T.thursday -4.586e+13 3.019e+06 -15189635 <2e-16 ***
## T.john 5.857e+13 1.632e+06 35889129 <2e-16 ***
## T.attend 3.022e+11 3.156e+06 95780 <2e-16 ***
## T.corp 1.138e+13 1.243e+06 9152384 <2e-16 ***
## T.morn -3.639e+14 3.558e+06 -102288857 <2e-16 ***
## T.monday -1.401e+14 3.018e+06 -46407174 <2e-16 ***
## T.univers -2.229e+13 2.375e+06 -9384888 <2e-16 ***
## T.appreci -1.381e+14 3.833e+06 -36022540 <2e-16 ***
## T.shall -2.084e+14 2.681e+06 -77731847 <2e-16 ***
## T.interview -2.328e+14 1.389e+06 -167573255 <2e-16 ***
## T.model -1.469e+14 1.067e+06 -137598861 <2e-16 ***
## T.houston -1.976e+14 1.241e+06 -159248530 <2e-16 ***
## T.question -3.884e+14 2.426e+06 -160127555 <2e-16 ***
## T.meet 5.134e+13 1.117e+06 45951902 <2e-16 ***
## T.talk -5.441e+13 2.452e+06 -22190618 <2e-16 ***
## T.num.words.log -6.891e+14 1.051e+07 -65584700 <2e-16 ***
## T.schedul -2.277e+14 2.086e+06 -109130880 <2e-16 ***
## T.group 1.629e+13 1.236e+06 13186599 <2e-16 ***
## T.discuss -1.679e+14 2.122e+06 -79108022 <2e-16 ***
## T.hou 2.286e+13 5.044e+05 45314406 <2e-16 ***
## T.know -7.062e+13 1.754e+06 -40262033 <2e-16 ***
## T.hope -1.790e+14 2.972e+06 -60220163 <2e-16 ***
## T.attach -1.030e+14 2.175e+06 -47364280 <2e-16 ***
## T.X2001 -2.504e+14 1.127e+06 -222262155 <2e-16 ***
## T.regard -1.302e+14 2.117e+06 -61495428 <2e-16 ***
## T.research -2.034e+14 1.141e+06 -178305695 <2e-16 ***
## T.pleas -1.343e+13 1.375e+06 -9770731 <2e-16 ***
## T.forward 6.832e+12 1.558e+06 4386109 <2e-16 ***
## T.X2000 -2.127e+14 9.161e+05 -232219731 <2e-16 ***
## T.subject 4.637e+13 1.925e+06 24095555 <2e-16 ***
## T.thank -3.528e+14 1.567e+06 -225136632 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4409.5 on 4009 degrees of freedom
## Residual deviance: 9299.3 on 3688 degrees of freedom
## AIC: 9943.3
##
## Number of Fisher Scoring iterations: 25
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 2951 28
## Y 101 930
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Reference
## Prediction N Y
## N 2951 28
## Y 101 930
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Reference
## Prediction N Y
## N 2951 28
## Y 101 930
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Reference
## Prediction N Y
## N 2951 28
## Y 101 930
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Reference
## Prediction N Y
## N 2951 28
## Y 101 930
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Reference
## Prediction N Y
## N 2951 28
## Y 101 930
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Reference
## Prediction N Y
## N 2951 28
## Y 101 930
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Reference
## Prediction N Y
## N 2951 28
## Y 101 930
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Reference
## Prediction N Y
## N 2951 28
## Y 101 930
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3856683
## 2 0.1 0.9351433
## 3 0.2 0.9351433
## 4 0.3 0.9351433
## 5 0.4 0.9351433
## 6 0.5 0.9351433
## 7 0.6 0.9351433
## 8 0.7 0.9351433
## 9 0.8 0.9351433
## 10 0.9 0.9351433
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Reference
## Prediction N Y
## N 2951 28
## Y 101 930
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 2951
## 2 Y 28
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 101
## 2 930
## Prediction
## Reference N Y
## N 2951 101
## Y 28 930
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.678304e-01 9.137921e-01 9.618923e-01 9.730733e-01 7.610973e-01
## AccuracyPValue McnemarPValue
## 8.191092e-295 2.308915e-10
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 1227 31
## Y 81 379
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Reference
## Prediction N Y
## N 1227 31
## Y 81 379
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Reference
## Prediction N Y
## N 1227 31
## Y 81 379
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Reference
## Prediction N Y
## N 1227 31
## Y 81 379
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Reference
## Prediction N Y
## N 1227 31
## Y 81 379
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Reference
## Prediction N Y
## N 1227 31
## Y 81 379
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Reference
## Prediction N Y
## N 1227 31
## Y 81 379
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Reference
## Prediction N Y
## N 1227 31
## Y 81 379
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Reference
## Prediction N Y
## N 1227 31
## Y 81 379
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3853383
## 2 0.1 0.8712644
## 3 0.2 0.8712644
## 4 0.3 0.8712644
## 5 0.4 0.8712644
## 6 0.5 0.8712644
## 7 0.6 0.8712644
## 8 0.7 0.8712644
## 9 0.8 0.8712644
## 10 0.9 0.8712644
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Reference
## Prediction N Y
## N 1227 31
## Y 81 379
## spam.fctr spam.fctr.predict.Low.cor.X.glm.N
## 1 N 1227
## 2 Y 31
## spam.fctr.predict.Low.cor.X.glm.Y
## 1 81
## 2 379
## Prediction
## Reference N Y
## N 1227 81
## Y 31 379
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.348079e-01 8.278095e-01 9.220828e-01 9.460215e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 4.608538e-82 3.655513e-06
## model_id model_method
## 1 Low.cor.X.glm glm
## feats
## 1 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.research, T.pleas, T.forward, T.X2000, T.subject, T.thank
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 46.686 16.432
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.9688397 0.9 0.9351433 0.9149608
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.9618923 0.9730733 0.7621305 0.9312318
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.9 0.8712644 0.9348079
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.9220828 0.9460215 0.8278095 9943.263
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.006085588 0.02245497
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="fit.models",
chunk_step_major=glb_script_df[nrow(glb_script_df), "chunk_step_major"],
chunk_step_minor=glb_script_df[nrow(glb_script_df), "chunk_step_minor"]+1,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed8 fit.models 5 0 96.019
## elapsed9 fit.models 5 1 173.248
# All X that is not user excluded
if (glb_is_classification && glb_is_binomial) {
model_id_pfx <- "Conditional.X"
# indep_vars_vctr <- setdiff(names(glb_trnent_df), union(glb_rsp_var, glb_exclude_vars_as_features))
indep_vars_vctr <- subset(glb_feats_df, is.ConditionalX.y &
(exclude.as.feat != 1))[, "id"]
} else {
model_id_pfx <- "All.X"
indep_vars_vctr <- subset(glb_feats_df,
(exclude.as.feat != 1))[, "id"]
}
for (method in glb_models_method_vctr) {
ret_lst <- myfit_mdl(model_id=paste0(model_id_pfx, ""), model_method=method,
indep_vars_vctr=indep_vars_vctr,
model_type=glb_model_type,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df)
# Since caret does not optimize rpart well
if (method == "rpart")
ret_lst <- myfit_mdl(model_id=paste0(model_id_pfx, ".cp.0"), model_method=method,
indep_vars_vctr=indep_vars_vctr,
model_type=glb_model_type,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df,
n_cv_folds=0, tune_models_df=data.frame(parameter="cp", min=0.0, max=0.0, by=0.1))
# Compare how rf performs w/i & w/o .rnorm
if (method == "rf")
ret_lst <- myfit_mdl(model_id=paste0(model_id_pfx, ".no.rnorm"), model_method=method,
indep_vars_vctr=setdiff(indep_vars_vctr, c(".rnorm")),
model_type=glb_model_type,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=glb_newent_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df)
}
## [1] "fitting model: Conditional.X.glm"
## [1] " indep_vars: T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank"
## + Fold1: parameter=none
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold1: parameter=none
## + Fold2: parameter=none
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold2: parameter=none
## + Fold3: parameter=none
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## - Fold3: parameter=none
## Aggregating results
## Fitting final model on full training set
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.49 0.00 0.00 0.00 8.49
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.082e+15 3.173e+07 -65594167 <2e-16 ***
## T.click 3.342e+14 2.545e+06 131286744 <2e-16 ***
## T.life 1.157e+15 3.579e+06 323268746 <2e-16 ***
## T.remov 3.517e+14 3.409e+06 103186770 <2e-16 ***
## T.websit 2.743e+13 2.295e+06 11954062 <2e-16 ***
## T.money 4.489e+14 2.361e+06 190115702 <2e-16 ***
## T.now 3.094e+14 2.286e+06 135372985 <2e-16 ***
## T.onlin 4.088e+14 2.591e+06 157796527 <2e-16 ***
## T.receiv -8.477e+13 1.927e+06 -43998305 <2e-16 ***
## T.softwar 3.315e+14 2.179e+06 152190256 <2e-16 ***
## T.invest 2.634e+14 2.307e+06 114156843 <2e-16 ***
## T.without 1.570e+14 4.562e+06 34419068 <2e-16 ***
## T.secur 4.603e+13 2.008e+06 22920765 <2e-16 ***
## T.has.http 6.191e+14 4.263e+06 145227216 <2e-16 ***
## T.offer 5.424e+13 1.990e+06 27255743 <2e-16 ***
## T.special 1.480e+14 3.608e+06 41010941 <2e-16 ***
## T.free 5.939e+13 1.923e+06 30877646 <2e-16 ***
## T.within 2.941e+14 3.432e+06 85692544 <2e-16 ***
## T.account 2.162e+14 1.542e+06 140225253 <2e-16 ***
## T.net 2.466e+14 2.526e+06 97657846 <2e-16 ***
## T.just 1.294e+14 2.408e+06 53737791 <2e-16 ***
## T.compani 4.464e+13 1.282e+06 34806204 <2e-16 ***
## T.wish 2.164e+14 4.105e+06 52720749 <2e-16 ***
## T.custom 9.323e+13 2.471e+06 37728289 <2e-16 ***
## T.busi -1.028e+14 1.365e+06 -75300680 <2e-16 ***
## T.mail 2.135e+14 1.628e+06 131157376 <2e-16 ***
## T.right 1.848e+13 3.248e+06 5690663 <2e-16 ***
## T.futur -2.992e+13 3.155e+06 -9482033 <2e-16 ***
## T.email 4.574e+13 1.161e+06 39394640 <2e-16 ***
## T.site 1.950e+13 1.924e+06 10134846 <2e-16 ***
## T.address -3.477e+13 2.055e+06 -16918652 <2e-16 ***
## T.home 1.224e+14 2.451e+06 49921193 <2e-16 ***
## T.internet -1.222e+14 2.709e+06 -45099162 <2e-16 ***
## T.X000 3.698e+13 1.648e+06 22435526 <2e-16 ***
## T.effect 3.522e+14 4.153e+06 84810055 <2e-16 ***
## T.list 7.229e+13 1.762e+06 41028059 <2e-16 ***
## T.made 5.578e+12 3.849e+06 1449116 <2e-16 ***
## T.success 5.636e+13 3.202e+06 17603041 <2e-16 ***
## T.order -1.039e+14 1.612e+06 -64411523 <2e-16 ***
## T.result 1.450e+14 3.175e+06 45648705 <2e-16 ***
## T.buy 1.324e+14 3.796e+06 34868655 <2e-16 ***
## T.link 3.142e+14 3.390e+06 92699817 <2e-16 ***
## T.http -1.219e+14 2.654e+06 -45943386 <2e-16 ***
## T.design 7.245e+13 3.257e+06 22245618 <2e-16 ***
## T.peopl -2.362e+14 2.690e+06 -87802168 <2e-16 ***
## T.product -4.576e+12 1.948e+06 -2348880 <2e-16 ***
## T.repli 4.292e+14 4.091e+06 104930764 <2e-16 ***
## T.line 9.873e+13 3.576e+06 27604678 <2e-16 ***
## T.thing 7.419e+13 3.722e+06 19932334 <2e-16 ***
## T.inform 9.648e+13 1.473e+06 65489071 <2e-16 ***
## T.info 1.729e+13 3.113e+06 5555001 <2e-16 ***
## T.immedi 4.763e+14 4.845e+06 98314574 <2e-16 ***
## T.make -6.152e+13 1.930e+06 -31874133 <2e-16 ***
## T.way 6.099e+13 2.987e+06 20419931 <2e-16 ***
## T.report -1.447e+14 1.590e+06 -90985728 <2e-16 ***
## T.start 3.451e+13 2.506e+06 13770558 <2e-16 ***
## T.believ 1.539e+14 4.470e+06 34441645 <2e-16 ***
## T.web -4.086e+13 2.967e+06 -13768351 <2e-16 ***
## T.provid -2.011e+13 2.229e+06 -9020685 <2e-16 ***
## T.mean 2.938e+13 4.628e+06 6349344 <2e-16 ***
## T.read -3.311e+13 4.188e+06 -7905713 <2e-16 ***
## T.one 1.321e+14 1.863e+06 70915534 <2e-16 ***
## T.expect -2.651e+14 3.291e+06 -80547454 <2e-16 ***
## T.per 1.595e+14 2.197e+06 72599073 <2e-16 ***
## T.full 2.327e+14 3.660e+06 63564887 <2e-16 ***
## T.return 1.089e+14 3.607e+06 30182383 <2e-16 ***
## T.version -1.989e+14 2.723e+06 -73053530 <2e-16 ***
## T.high 1.958e+14 2.879e+06 68015207 <2e-16 ***
## T.get 5.136e+13 1.537e+06 33406367 <2e-16 ***
## T.don -6.497e+13 3.261e+06 -19925028 <2e-16 ***
## T.www -6.838e+13 2.562e+06 -26684273 <2e-16 ***
## T.best -8.928e+13 2.446e+06 -36496235 <2e-16 ***
## T.today -7.772e+13 2.770e+06 -28053755 <2e-16 ***
## T.name 2.469e+13 1.888e+06 13078760 <2e-16 ***
## T.send -3.097e+14 2.224e+06 -139277393 <2e-16 ***
## T.even -2.387e+14 3.268e+06 -73018721 <2e-16 ***
## T.mani 3.075e+14 3.275e+06 93910901 <2e-16 ***
## T.involv 1.825e+14 4.346e+06 41992294 <2e-16 ***
## T.form -9.294e+12 2.830e+06 -3283491 <2e-16 ***
## T.servic -3.312e+13 1.653e+06 -20038817 <2e-16 ***
## T.system 5.035e+13 1.826e+06 27573573 <2e-16 ***
## T.want -5.869e+13 1.913e+06 -30684738 <2e-16 ***
## T.increas -2.938e+14 2.754e+06 -106684463 <2e-16 ***
## T.creat 3.590e+13 3.907e+06 9189512 <2e-16 ***
## T.hour 1.028e+14 2.886e+06 35603119 <2e-16 ***
## T.corpor 6.808e+13 2.914e+06 23364985 <2e-16 ***
## T.check 8.571e+13 3.260e+06 26291417 <2e-16 ***
## T.say -7.488e+13 3.785e+06 -19782520 <2e-16 ***
## T.engin 2.589e+14 3.101e+06 83509666 <2e-16 ***
## T.hello 1.898e+14 3.184e+06 59616579 <2e-16 ***
## T.keep -2.082e+14 3.840e+06 -54226654 <2e-16 ***
## T.person -7.121e+13 2.844e+06 -25040111 <2e-16 ***
## T.avail -9.291e+13 2.425e+06 -38321101 <2e-16 ***
## T.month 2.389e+11 1.951e+06 122420 <2e-16 ***
## T.real 5.582e+12 2.595e+06 2150843 <2e-16 ***
## T.program -9.460e+13 1.398e+06 -67678653 <2e-16 ***
## T.write -3.868e+13 4.261e+06 -9076250 <2e-16 ***
## T.place 1.598e+14 4.113e+06 38845659 <2e-16 ***
## T.import 1.542e+12 3.935e+06 391734 <2e-16 ***
## T.complet -8.461e+13 3.006e+06 -28143661 <2e-16 ***
## T.state 9.094e+12 2.055e+06 4425912 <2e-16 ***
## T.type -2.525e+14 3.509e+06 -71958765 <2e-16 ***
## T.done 3.702e+13 4.814e+06 7688874 <2e-16 ***
## T.rate 1.744e+14 2.008e+06 86858141 <2e-16 ***
## T.financi -1.034e+14 2.227e+06 -46424649 <2e-16 ***
## T.includ -1.474e+14 2.479e+06 -59476052 <2e-16 ***
## T.industri -2.266e+14 2.346e+06 -96591579 <2e-16 ***
## T.lot 2.643e+13 4.967e+06 5322160 <2e-16 ***
## T.sever 8.745e+13 4.366e+06 20030820 <2e-16 ***
## T.base 1.502e+13 2.633e+06 5703278 <2e-16 ***
## T.applic 1.198e+13 2.651e+06 4516673 <2e-16 ***
## T.due 2.615e+14 4.146e+06 63067324 <2e-16 ***
## T.realli 2.635e+14 4.441e+06 59322977 <2e-16 ***
## T.day -8.865e+13 1.668e+06 -53145950 <2e-16 ***
## T.requir -2.034e+13 2.950e+06 -6895216 <2e-16 ***
## T.messag 7.562e+13 1.628e+06 46446480 <2e-16 ***
## T.assist 5.933e+13 3.367e+06 17623440 <2e-16 ***
## T.take 1.101e+13 2.245e+06 4904156 <2e-16 ***
## T.allow 1.543e+14 4.039e+06 38205016 <2e-16 ***
## T.effort 1.039e+14 3.353e+06 30991800 <2e-16 ***
## T.tri -3.554e+13 3.240e+06 -10969220 <2e-16 ***
## T.market 3.222e+13 1.049e+06 30697272 <2e-16 ***
## T.use -6.507e+13 1.545e+06 -42119403 <2e-16 ***
## T.oper -6.498e+13 2.421e+06 -26837606 <2e-16 ***
## T.first 1.373e+14 2.462e+06 55787976 <2e-16 ***
## T.cost -1.665e+14 2.061e+06 -80793483 <2e-16 ***
## T.much -2.514e+14 3.119e+06 -80605928 <2e-16 ***
## T.find -1.374e+14 2.903e+06 -47327714 <2e-16 ***
## T.public -2.271e+14 3.357e+06 -67650941 <2e-16 ***
## T.sure 3.159e+14 4.240e+06 74503281 <2e-16 ***
## T.valu -4.666e+13 2.002e+06 -23308407 <2e-16 ***
## T.new -6.608e+13 1.364e+06 -48448136 <2e-16 ***
## .rnorm -1.004e+13 1.090e+06 -9211493 <2e-16 ***
## T.access -1.693e+14 2.238e+06 -75646771 <2e-16 ***
## T.term 1.455e+14 2.997e+06 48545230 <2e-16 ***
## T.short 5.047e+13 3.298e+06 15305098 <2e-16 ***
## T.interest 2.385e+14 1.886e+06 126501035 <2e-16 ***
## T.unit -3.321e+13 2.342e+06 -14179580 <2e-16 ***
## T.part -1.302e+14 3.386e+06 -38467908 <2e-16 ***
## T.better 2.454e+14 4.253e+06 57702310 <2e-16 ***
## T.file 4.191e+13 2.008e+06 20864966 <2e-16 ***
## T.see 4.406e+13 2.261e+06 19485965 <2e-16 ***
## T.recent 2.331e+13 4.337e+06 5374701 <2e-16 ***
## T.chang -2.504e+14 2.201e+06 -113752024 <2e-16 ***
## T.may -2.072e+13 1.331e+06 -15567623 <2e-16 ***
## T.plan -1.735e+14 2.037e+06 -85180175 <2e-16 ***
## T.event 9.134e+13 2.303e+06 39652756 <2e-16 ***
## T.member -4.875e+13 2.964e+06 -16446382 <2e-16 ***
## T.intern -9.642e+11 2.654e+06 -363327 <2e-16 ***
## T.number -3.719e+13 2.169e+06 -17147842 <2e-16 ***
## T.great 5.172e+13 2.798e+06 18480414 <2e-16 ***
## T.differ -9.396e+13 3.256e+06 -28859080 <2e-16 ***
## T.specif -3.886e+14 4.127e+06 -94155619 <2e-16 ***
## T.long -1.111e+14 3.910e+06 -28412665 <2e-16 ***
## T.alreadi -1.538e+14 4.417e+06 -34829646 <2e-16 ***
## T.good -3.341e+12 2.698e+06 -1238460 <2e-16 ***
## T.visit 1.055e+14 1.876e+06 56220792 <2e-16 ***
## T.credit 1.083e+14 1.576e+06 68704490 <2e-16 ***
## T.relat -1.631e+14 3.179e+06 -51304810 <2e-16 ***
## T.resourc -1.562e+14 3.019e+06 -51731232 <2e-16 ***
## T.approv -1.606e+14 2.460e+06 -65281402 <2e-16 ***
## T.opportun 1.358e+14 2.413e+06 56291549 <2e-16 ***
## T.look 6.370e+13 1.940e+06 32840753 <2e-16 ***
## T.bring -7.446e+13 4.461e+06 -16692012 <2e-16 ***
## T.area 3.585e+14 3.310e+06 108314762 <2e-16 ***
## T.current 1.111e+13 2.395e+06 4639606 <2e-16 ***
## T.deal -8.126e+13 2.741e+06 -29650423 <2e-16 ***
## T.num.words.unq -2.351e+13 1.198e+05 -196335875 <2e-16 ***
## T.given -4.275e+14 4.640e+06 -92137071 <2e-16 ***
## T.experi -1.173e+14 4.155e+06 -28231153 <2e-16 ***
## T.price 7.838e+13 9.899e+05 79178935 <2e-16 ***
## T.move 4.476e+13 2.954e+06 15152124 <2e-16 ***
## T.put -8.214e+13 4.287e+06 -19163252 <2e-16 ***
## T.posit -1.007e+14 1.971e+06 -51080128 <2e-16 ***
## T.sorri 1.103e+14 4.204e+06 26232341 <2e-16 ***
## T.review -4.274e+13 1.885e+06 -22675238 <2e-16 ***
## T.locat 9.659e+13 3.627e+06 26628098 <2e-16 ***
## T.anoth -2.499e+14 3.939e+06 -63433310 <2e-16 ***
## T.continu -7.468e+13 3.654e+06 -20438334 <2e-16 ***
## T.can 3.675e+13 1.285e+06 28589678 <2e-16 ***
## T.cours 6.649e+13 1.796e+06 37024080 <2e-16 ***
## T.sinc -9.914e+13 3.692e+06 -26855788 <2e-16 ***
## T.contract 7.673e+13 2.466e+06 31111669 <2e-16 ***
## T.come 8.282e+13 2.873e+06 28828466 <2e-16 ***
## T.respons -1.861e+13 2.918e+06 -6377759 <2e-16 ***
## T.sincer -1.136e+14 4.395e+06 -25848072 <2e-16 ***
## T.end -6.340e+13 2.118e+06 -29939111 <2e-16 ***
## T.year -2.137e+14 1.455e+06 -146806265 <2e-16 ***
## T.time -2.844e+13 1.337e+06 -21263870 <2e-16 ***
## T.project -3.896e+13 1.499e+06 -25990080 <2e-16 ***
## T.com -3.439e+13 5.704e+05 -60282855 <2e-16 ***
## T.addit -1.875e+14 2.977e+06 -62970241 <2e-16 ***
## T.detail 1.808e+14 3.245e+06 55701373 <2e-16 ***
## T.might -1.100e+14 3.354e+06 -32791628 <2e-16 ***
## T.process 7.477e+13 2.288e+06 32679694 <2e-16 ***
## T.open 5.635e+13 3.803e+06 14814858 <2e-16 ***
## T.trade -1.638e+14 1.386e+06 -118186802 <2e-16 ***
## T.idea -2.349e+14 4.421e+06 -53122503 <2e-16 ***
## T.feel -2.156e+13 3.838e+06 -5619094 <2e-16 ***
## T.case -1.585e+14 2.825e+06 -56111421 <2e-16 ***
## T.respond 2.271e+14 4.389e+06 51744169 <2e-16 ***
## T.gas -4.563e+13 1.462e+06 -31211139 <2e-16 ***
## T.updat -8.725e+13 2.616e+06 -33350912 <2e-16 ***
## T.either -9.223e+13 5.270e+06 -17500759 <2e-16 ***
## T.communic -4.006e+13 7.633e+05 -52485462 <2e-16 ***
## T.final 9.796e+13 3.451e+06 28386796 <2e-16 ***
## T.need 2.288e+13 1.654e+06 13828619 <2e-16 ***
## T.well -1.567e+14 2.513e+06 -62372105 <2e-16 ***
## T.num.chars 1.719e+11 7.208e+03 23853994 <2e-16 ***
## T.num.words 1.531e+13 1.022e+05 149730010 <2e-16 ***
## T.support -7.342e+13 2.351e+06 -31228166 <2e-16 ***
## T.join -3.621e+14 3.949e+06 -91689104 <2e-16 ***
## T.give -6.162e+13 2.875e+06 -21435005 <2e-16 ***
## T.power -8.640e+13 1.161e+06 -74410657 <2e-16 ***
## T.juli -9.721e+13 1.993e+06 -48768985 <2e-16 ***
## T.follow -6.112e+13 2.117e+06 -28868136 <2e-16 ***
## T.back -1.404e+14 2.732e+06 -51399515 <2e-16 ***
## T.direct -4.248e+14 3.108e+06 -136683120 <2e-16 ***
## T.abl 2.611e+14 3.384e+06 77142268 <2e-16 ***
## T.phone -3.333e+13 2.374e+06 -14037792 <2e-16 ***
## T.director -5.881e+13 2.975e+06 -19770683 <2e-16 ***
## T.help -1.087e+13 2.077e+06 -5234275 <2e-16 ***
## T.two -2.119e+14 2.667e+06 -79426151 <2e-16 ***
## T.point -2.205e+14 3.097e+06 -71209996 <2e-16 ***
## T.note 9.948e+13 3.056e+06 32553645 <2e-16 ***
## T.thought -7.782e+13 4.557e+06 -17076084 <2e-16 ***
## T.book -1.626e+14 2.052e+06 -79217755 <2e-16 ***
## T.offic -1.697e+14 2.180e+06 -77836760 <2e-16 ***
## T.hear 7.578e+13 4.102e+06 18473303 <2e-16 ***
## T.comment -2.320e+14 2.826e+06 -82088696 <2e-16 ***
## T.copi -1.983e+14 2.138e+06 -92741802 <2e-16 ***
## T.associ -7.121e+12 1.824e+06 -3904835 <2e-16 ***
## T.particip 2.316e+12 2.272e+06 1019262 <2e-16 ***
## T.etc -5.106e+13 3.697e+06 -13810689 <2e-16 ***
## T.still -6.926e+13 3.729e+06 -18575256 <2e-16 ***
## T.confirm -2.360e+14 2.592e+06 -91060651 <2e-16 ***
## T.will -7.102e+13 7.551e+05 -94048744 <2e-16 ***
## T.run -1.170e+14 3.642e+06 -32140488 <2e-16 ***
## T.fax -1.264e+14 1.692e+06 -74733304 <2e-16 ***
## T.work -9.181e+13 1.433e+06 -64081353 <2e-16 ***
## T.origin 2.369e+14 2.669e+06 88784138 <2e-16 ***
## T.problem 1.825e+13 3.111e+06 5867497 <2e-16 ***
## T.contact 1.011e+14 2.100e+06 48131813 <2e-16 ***
## T.next. 4.827e+13 2.419e+06 19953153 <2e-16 ***
## T.understand -1.098e+14 3.556e+06 -30860141 <2e-16 ***
## T.howev -3.231e+14 3.557e+06 -90847056 <2e-16 ***
## T.develop -3.427e+13 9.861e+05 -34751541 <2e-16 ***
## T.mention -3.044e+14 4.497e+06 -67681596 <2e-16 ***
## T.data -2.036e+14 1.731e+06 -117619237 <2e-16 ***
## T.issu -1.632e+14 2.534e+06 -64407287 <2e-16 ***
## T.sent -2.631e+14 2.808e+06 -93681756 <2e-16 ***
## T.togeth -1.554e+14 4.383e+06 -35460212 <2e-16 ***
## T.begin -5.551e+12 5.113e+06 -1085836 <2e-16 ***
## T.team -1.082e+14 2.051e+06 -52745100 <2e-16 ***
## T.deriv 3.605e+13 2.345e+06 15373063 <2e-16 ***
## T.present -3.394e+13 1.407e+06 -24124685 <2e-16 ***
## T.happi 3.887e+13 4.181e+06 9297033 <2e-16 ***
## T.date -8.807e+13 2.171e+06 -40562304 <2e-16 ***
## T.also 9.022e+13 2.153e+06 41897607 <2e-16 ***
## T.set -7.146e+13 3.103e+06 -23029147 <2e-16 ***
## T.tuesday -1.022e+14 3.281e+06 -31163758 <2e-16 ***
## T.mark -1.588e+14 1.955e+06 -81244317 <2e-16 ***
## T.soon -1.304e+14 4.001e+06 -32605099 <2e-16 ***
## T.last 2.560e+13 2.574e+06 9944005 <2e-16 ***
## T.request -2.000e+14 1.617e+06 -123658512 <2e-16 ***
## T.april -1.403e+14 2.434e+06 -57635845 <2e-16 ***
## T.risk 5.340e+12 1.098e+06 4864945 <2e-16 ***
## T.like 5.178e+13 1.747e+06 29647858 <2e-16 ***
## T.num.words.unq.log 1.853e+15 1.345e+07 137776659 <2e-16 ***
## T.invit 3.458e+13 2.541e+06 13608210 <2e-16 ***
## T.depart -1.669e+14 3.048e+06 -54767281 <2e-16 ***
## T.think -3.127e+14 2.371e+06 -131907185 <2e-16 ***
## T.analysi -2.796e+14 3.108e+06 -89948104 <2e-16 ***
## T.possibl -2.651e+14 3.214e+06 -82493743 <2e-16 ***
## T.week -6.201e+13 1.956e+06 -31695501 <2e-16 ***
## T.school -2.632e+14 2.473e+06 -106424817 <2e-16 ***
## T.london -1.331e+13 1.890e+06 -7039196 <2e-16 ***
## T.robert -2.506e+14 2.933e+06 -85431852 <2e-16 ***
## T.student -1.733e+14 1.998e+06 -86731928 <2e-16 ***
## T.call -5.434e+13 1.596e+06 -34042959 <2e-16 ***
## T.option -8.682e+13 1.302e+06 -66660806 <2e-16 ***
## T.confer -5.831e+13 1.355e+06 -43039366 <2e-16 ***
## T.dear -7.434e+13 3.446e+06 -21571355 <2e-16 ***
## T.edu -9.119e+13 8.434e+05 -108122115 <2e-16 ***
## T.wednesday -5.251e+13 3.500e+06 -15005574 <2e-16 ***
## T.arrang 1.081e+14 3.844e+06 28115293 <2e-16 ***
## T.manag -8.357e+12 1.360e+06 -6143877 <2e-16 ***
## T.ask -2.600e+13 2.461e+06 -10566815 <2e-16 ***
## T.energi -6.108e+13 1.042e+06 -58614290 <2e-16 ***
## T.financ -4.560e+13 2.036e+06 -22397384 <2e-16 ***
## T.doc -2.525e+14 2.349e+06 -107485485 <2e-16 ***
## T.suggest -2.712e+14 3.196e+06 -84845615 <2e-16 ***
## T.friday 9.781e+13 2.445e+06 40008997 <2e-16 ***
## T.resum -1.701e+14 1.811e+06 -93900836 <2e-16 ***
## T.thursday -9.654e+11 3.021e+06 -319555 <2e-16 ***
## T.john -6.501e+13 1.645e+06 -39528144 <2e-16 ***
## T.attend -2.146e+14 3.162e+06 -67869108 <2e-16 ***
## T.corp -2.851e+13 1.287e+06 -22156051 <2e-16 ***
## T.morn -5.903e+14 3.561e+06 -165749161 <2e-16 ***
## T.monday -1.878e+14 3.022e+06 -62163012 <2e-16 ***
## T.univers 4.406e+13 2.380e+06 18513349 <2e-16 ***
## T.appreci -1.301e+14 3.857e+06 -33721530 <2e-16 ***
## T.shall -2.308e+14 2.682e+06 -86058988 <2e-16 ***
## T.interview -1.710e+14 1.390e+06 -123018034 <2e-16 ***
## T.model -9.086e+13 1.073e+06 -84715925 <2e-16 ***
## T.houston -1.800e+14 1.245e+06 -144593075 <2e-16 ***
## T.question -4.387e+14 2.435e+06 -180174154 <2e-16 ***
## T.meet -6.486e+13 1.128e+06 -57490803 <2e-16 ***
## T.talk -1.155e+14 2.457e+06 -47018652 <2e-16 ***
## T.num.words.log -1.840e+15 1.773e+07 -103769285 <2e-16 ***
## T.num.chars.log 3.746e+14 1.396e+07 26844200 <2e-16 ***
## T.schedul -2.062e+14 2.090e+06 -98654182 <2e-16 ***
## T.group 1.441e+13 1.241e+06 11613629 <2e-16 ***
## T.discuss -1.425e+14 2.125e+06 -67046624 <2e-16 ***
## T.hou -3.366e+13 8.109e+05 -41508188 <2e-16 ***
## T.know 2.605e+13 2.370e+06 10992599 <2e-16 ***
## T.hope -2.684e+14 2.976e+06 -90180102 <2e-16 ***
## T.attach -1.690e+14 2.179e+06 -77570806 <2e-16 ***
## T.X2001 -2.128e+14 1.132e+06 -188032772 <2e-16 ***
## T.regard -1.255e+14 2.118e+06 -59256356 <2e-16 ***
## T.ect -2.404e+11 4.823e+05 -498587 <2e-16 ***
## T.research -2.114e+14 1.147e+06 -184384906 <2e-16 ***
## T.pleas -8.338e+13 1.390e+06 -59970539 <2e-16 ***
## T.forward -1.186e+14 1.566e+06 -75774357 <2e-16 ***
## T.let -2.059e+14 2.856e+06 -72098654 <2e-16 ***
## T.X2000 -2.574e+14 9.199e+05 -279809718 <2e-16 ***
## T.subject 1.078e+14 1.957e+06 55081391 <2e-16 ***
## T.thank -5.227e+14 1.576e+06 -331629488 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4409.5 on 4009 degrees of freedom
## Residual deviance: 6776.2 on 3682 degrees of freedom
## AIC: 7432.2
##
## Number of Fisher Scoring iterations: 25
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Conditional.X.glm.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 3034 76
## Y 18 882
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Reference
## Prediction N Y
## N 3034 76
## Y 18 882
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Reference
## Prediction N Y
## N 3034 76
## Y 18 882
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Reference
## Prediction N Y
## N 3034 76
## Y 18 882
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Reference
## Prediction N Y
## N 3034 76
## Y 18 882
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Reference
## Prediction N Y
## N 3034 76
## Y 18 882
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Reference
## Prediction N Y
## N 3034 76
## Y 18 882
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Reference
## Prediction N Y
## N 3034 76
## Y 18 882
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Reference
## Prediction N Y
## N 3034 76
## Y 18 882
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Conditional.X.glm.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3856683
## 2 0.1 0.9494080
## 3 0.2 0.9494080
## 4 0.3 0.9494080
## 5 0.4 0.9494080
## 6 0.5 0.9494080
## 7 0.6 0.9494080
## 8 0.7 0.9494080
## 9 0.8 0.9494080
## 10 0.9 0.9494080
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Reference
## Prediction N Y
## N 3034 76
## Y 18 882
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 3034
## 2 Y 76
## spam.fctr.predict.Conditional.X.glm.Y
## 1 18
## 2 882
## Prediction
## Reference N Y
## N 3034 18
## Y 76 882
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.765586e-01 9.341725e-01 9.713892e-01 9.810164e-01 7.610973e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 4.125004e-09
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Conditional.X.glm.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 1266 65
## Y 42 345
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Reference
## Prediction N Y
## N 1266 65
## Y 42 345
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Reference
## Prediction N Y
## N 1266 65
## Y 42 345
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Reference
## Prediction N Y
## N 1266 65
## Y 42 345
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Reference
## Prediction N Y
## N 1266 65
## Y 42 345
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Reference
## Prediction N Y
## N 1266 65
## Y 42 345
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Reference
## Prediction N Y
## N 1266 65
## Y 42 345
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Reference
## Prediction N Y
## N 1266 65
## Y 42 345
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Reference
## Prediction N Y
## N 1266 65
## Y 42 345
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Conditional.X.glm.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3853383
## 2 0.1 0.8657465
## 3 0.2 0.8657465
## 4 0.3 0.8657465
## 5 0.4 0.8657465
## 6 0.5 0.8657465
## 7 0.6 0.8657465
## 8 0.7 0.8657465
## 9 0.8 0.8657465
## 10 0.9 0.8657465
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Reference
## Prediction N Y
## N 1266 65
## Y 42 345
## spam.fctr spam.fctr.predict.Conditional.X.glm.N
## 1 N 1266
## 2 Y 65
## spam.fctr.predict.Conditional.X.glm.Y
## 1 42
## 2 345
## Prediction
## Reference N Y
## N 1266 42
## Y 65 345
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.377183e-01 8.252448e-01 9.252328e-01 9.486811e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 2.243066e-85 3.343502e-02
## model_id model_method
## 1 Conditional.X.glm glm
## feats
## 1 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 51.673 17.404
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.9573851 0.9 0.949408 0.9189498
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.9713892 0.9810164 0.7844885 0.9046767
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.9 0.8657465 0.9377183
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.9252328 0.9486811 0.8252448 7432.207
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.00487666 0.01175917
## [1] "fitting model: Conditional.X.rpart"
## [1] " indep_vars: T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank"
## + Fold1: cp=0.02035
## - Fold1: cp=0.02035
## + Fold2: cp=0.02035
## - Fold2: cp=0.02035
## + Fold3: cp=0.02035
## - Fold3: cp=0.02035
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.0204 on full training set
## Warning in myfit_mdl(model_id = paste0(model_id_pfx, ""), model_method =
## method, : model's bestTune found at an extreme of tuneGrid for parameter:
## cp
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 4010
##
## CP nsplit rel error
## 1 0.05803758 0 1.0000000
## 2 0.05427975 6 0.6002088
## 3 0.02035491 7 0.5459290
##
## Variable importance
## T.thank T.X2000 T.ect T.hou T.X2001 T.subject T.life
## 17 12 10 10 8 8 6
## T.money T.offer T.remov T.let T.forward T.energi T.websit
## 6 5 3 3 3 1 1
## T.softwar T.thing T.messag T.invest T.corp
## 1 1 1 1 1
##
## Node number 1: 4010 observations, complexity param=0.05803758
## predicted class=N expected loss=0.2389027 P(node) =1
## class counts: 3052 958
## probabilities: 0.761 0.239
## left son=2 (1704 obs) right son=3 (2306 obs)
## Primary splits:
## T.thank < 0.5 to the right, improve=197.5233, (0 missing)
## T.X2000 < 0.5 to the right, improve=177.1062, (0 missing)
## T.ect < 0.5 to the right, improve=174.3138, (0 missing)
## T.subject < 1.5 to the right, improve=163.5183, (0 missing)
## T.hou < 0.5 to the right, improve=161.0811, (0 missing)
## Surrogate splits:
## T.subject < 1.5 to the right, agree=0.696, adj=0.285, (0 split)
## T.ect < 0.5 to the right, agree=0.678, adj=0.242, (0 split)
## T.hou < 0.5 to the right, agree=0.675, adj=0.236, (0 split)
## T.let < 0.5 to the right, agree=0.654, adj=0.185, (0 split)
## T.forward < 0.5 to the right, agree=0.653, adj=0.183, (0 split)
##
## Node number 2: 1704 observations
## predicted class=N expected loss=0.05633803 P(node) =0.4249377
## class counts: 1608 96
## probabilities: 0.944 0.056
##
## Node number 3: 2306 observations, complexity param=0.05803758
## predicted class=N expected loss=0.3738075 P(node) =0.5750623
## class counts: 1444 862
## probabilities: 0.626 0.374
## left son=6 (473 obs) right son=7 (1833 obs)
## Primary splits:
## T.X2000 < 0.5 to the right, improve=142.74190, (0 missing)
## T.ect < 0.5 to the right, improve=112.97750, (0 missing)
## T.life < 0.5 to the left, improve=110.07470, (0 missing)
## T.hou < 0.5 to the right, improve=103.61560, (0 missing)
## T.money < 0.5 to the left, improve= 95.05104, (0 missing)
## Surrogate splits:
## T.hou < 0.5 to the right, agree=0.846, adj=0.249, (0 split)
## T.ect < 0.5 to the right, agree=0.846, adj=0.247, (0 split)
## T.subject < 1.5 to the right, agree=0.817, adj=0.110, (0 split)
## T.corp < 0.5 to the right, agree=0.804, adj=0.042, (0 split)
## T.energi < 2.5 to the right, agree=0.803, adj=0.038, (0 split)
##
## Node number 6: 473 observations
## predicted class=N expected loss=0.02748414 P(node) =0.1179551
## class counts: 460 13
## probabilities: 0.973 0.027
##
## Node number 7: 1833 observations, complexity param=0.05803758
## predicted class=N expected loss=0.4631751 P(node) =0.4571072
## class counts: 984 849
## probabilities: 0.537 0.463
## left son=14 (246 obs) right son=15 (1587 obs)
## Primary splits:
## T.X2001 < 0.5 to the right, improve=91.92502, (0 missing)
## T.life < 0.5 to the left, improve=87.84373, (0 missing)
## T.research < 0.5 to the right, improve=80.56535, (0 missing)
## T.money < 0.5 to the left, improve=75.46572, (0 missing)
## T.offer < 0.5 to the left, improve=73.41183, (0 missing)
## Surrogate splits:
## T.ect < 0.5 to the right, agree=0.913, adj=0.350, (0 split)
## T.hou < 0.5 to the right, agree=0.912, adj=0.346, (0 split)
## T.subject < 1.5 to the right, agree=0.889, adj=0.175, (0 split)
## T.confer < 2.5 to the right, agree=0.872, adj=0.049, (0 split)
## T.energi < 1.5 to the right, agree=0.872, adj=0.049, (0 split)
##
## Node number 14: 246 observations
## predicted class=N expected loss=0.06097561 P(node) =0.06134663
## class counts: 231 15
## probabilities: 0.939 0.061
##
## Node number 15: 1587 observations, complexity param=0.05803758
## predicted class=Y expected loss=0.4744802 P(node) =0.3957606
## class counts: 753 834
## probabilities: 0.474 0.526
## left son=30 (1428 obs) right son=31 (159 obs)
## Primary splits:
## T.life < 0.5 to the left, improve=73.36123, (0 missing)
## T.research < 0.5 to the right, improve=71.02565, (0 missing)
## T.offer < 0.5 to the left, improve=63.17038, (0 missing)
## T.attach < 0.5 to the right, improve=61.88030, (0 missing)
## T.money < 0.5 to the left, improve=58.91211, (0 missing)
## Surrogate splits:
## T.messag < 2.5 to the left, agree=0.909, adj=0.094, (0 split)
## T.per < 1.5 to the left, agree=0.904, adj=0.044, (0 split)
## T.sever < 1.5 to the left, agree=0.904, adj=0.038, (0 split)
## T.sorri < 1.5 to the left, agree=0.904, adj=0.038, (0 split)
## T.now < 3.5 to the left, agree=0.902, adj=0.019, (0 split)
##
## Node number 30: 1428 observations, complexity param=0.05803758
## predicted class=N expected loss=0.4747899 P(node) =0.3561097
## class counts: 750 678
## probabilities: 0.525 0.475
## left son=60 (1279 obs) right son=61 (149 obs)
## Primary splits:
## T.money < 0.5 to the left, improve=65.78938, (0 missing)
## T.research < 0.5 to the right, improve=58.50566, (0 missing)
## T.offer < 0.5 to the left, improve=53.15943, (0 missing)
## T.websit < 0.5 to the left, improve=52.97876, (0 missing)
## T.attach < 0.5 to the right, improve=49.37902, (0 missing)
## Surrogate splits:
## T.websit < 5.5 to the left, agree=0.910, adj=0.141, (0 split)
## T.thing < 1.5 to the left, agree=0.909, adj=0.128, (0 split)
## T.invest < 0.5 to the left, agree=0.905, adj=0.094, (0 split)
## T.compani < 5.5 to the left, agree=0.901, adj=0.047, (0 split)
## T.buy < 1.5 to the left, agree=0.901, adj=0.047, (0 split)
##
## Node number 31: 159 observations
## predicted class=Y expected loss=0.01886792 P(node) =0.03965087
## class counts: 3 156
## probabilities: 0.019 0.981
##
## Node number 60: 1279 observations, complexity param=0.05803758
## predicted class=N expected loss=0.4229867 P(node) =0.3189526
## class counts: 738 541
## probabilities: 0.577 0.423
## left son=120 (1112 obs) right son=121 (167 obs)
## Primary splits:
## T.offer < 0.5 to the left, improve=58.84636, (0 missing)
## T.research < 0.5 to the right, improve=52.78682, (0 missing)
## T.click < 0.5 to the left, improve=48.99561, (0 missing)
## T.remov < 0.5 to the left, improve=45.85111, (0 missing)
## T.attach < 0.5 to the right, improve=39.78679, (0 missing)
## Surrogate splits:
## T.softwar < 1.5 to the left, agree=0.890, adj=0.156, (0 split)
## T.version < 1.5 to the left, agree=0.881, adj=0.090, (0 split)
## T.special < 1.5 to the left, agree=0.880, adj=0.078, (0 split)
## T.just < 1.5 to the left, agree=0.879, adj=0.072, (0 split)
## T.list < 2.5 to the left, agree=0.876, adj=0.054, (0 split)
##
## Node number 61: 149 observations
## predicted class=Y expected loss=0.08053691 P(node) =0.03715711
## class counts: 12 137
## probabilities: 0.081 0.919
##
## Node number 120: 1112 observations, complexity param=0.05427975
## predicted class=N expected loss=0.3642086 P(node) =0.2773067
## class counts: 707 405
## probabilities: 0.636 0.364
## left son=240 (1044 obs) right son=241 (68 obs)
## Primary splits:
## T.remov < 0.5 to the left, improve=38.89060, (0 missing)
## T.research < 0.5 to the right, improve=38.85484, (0 missing)
## T.click < 0.5 to the left, improve=37.02807, (0 missing)
## T.onlin < 0.5 to the left, improve=32.06984, (0 missing)
## T.has.http < 0.5 to the left, improve=31.36534, (0 missing)
## Surrogate splits:
## T.name < 5 to the left, agree=0.942, adj=0.059, (0 split)
## T.free < 3.5 to the left, agree=0.941, adj=0.029, (0 split)
## T.confirm < 3.5 to the left, agree=0.941, adj=0.029, (0 split)
## T.special < 2.5 to the left, agree=0.940, adj=0.015, (0 split)
## T.mail < 3.5 to the left, agree=0.940, adj=0.015, (0 split)
##
## Node number 121: 167 observations
## predicted class=Y expected loss=0.1856287 P(node) =0.04164589
## class counts: 31 136
## probabilities: 0.186 0.814
##
## Node number 240: 1044 observations
## predicted class=N expected loss=0.3304598 P(node) =0.2603491
## class counts: 699 345
## probabilities: 0.670 0.330
##
## Node number 241: 68 observations
## predicted class=Y expected loss=0.1176471 P(node) =0.01695761
## class counts: 8 60
## probabilities: 0.118 0.882
##
## n= 4010
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 4010 958 N (0.76109726 0.23890274)
## 2) T.thank>=0.5 1704 96 N (0.94366197 0.05633803) *
## 3) T.thank< 0.5 2306 862 N (0.62619254 0.37380746)
## 6) T.X2000>=0.5 473 13 N (0.97251586 0.02748414) *
## 7) T.X2000< 0.5 1833 849 N (0.53682488 0.46317512)
## 14) T.X2001>=0.5 246 15 N (0.93902439 0.06097561) *
## 15) T.X2001< 0.5 1587 753 Y (0.47448015 0.52551985)
## 30) T.life< 0.5 1428 678 N (0.52521008 0.47478992)
## 60) T.money< 0.5 1279 541 N (0.57701329 0.42298671)
## 120) T.offer< 0.5 1112 405 N (0.63579137 0.36420863)
## 240) T.remov< 0.5 1044 345 N (0.66954023 0.33045977) *
## 241) T.remov>=0.5 68 8 Y (0.11764706 0.88235294) *
## 121) T.offer>=0.5 167 31 Y (0.18562874 0.81437126) *
## 61) T.money>=0.5 149 12 Y (0.08053691 0.91946309) *
## 31) T.life>=0.5 159 3 Y (0.01886792 0.98113208) *
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 2299 124
## Y 753 834
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 2299
## 2 Y 124
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 753
## 2 834
## Reference
## Prediction N Y
## N 2299 124
## Y 753 834
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 2299
## 2 Y 124
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 753
## 2 834
## Reference
## Prediction N Y
## N 2299 124
## Y 753 834
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 2299
## 2 Y 124
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 753
## 2 834
## Reference
## Prediction N Y
## N 2998 469
## Y 54 489
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 2998
## 2 Y 469
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 54
## 2 489
## Reference
## Prediction N Y
## N 2998 469
## Y 54 489
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 2998
## 2 Y 469
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 54
## 2 489
## Reference
## Prediction N Y
## N 2998 469
## Y 54 489
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 2998
## 2 Y 469
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 54
## 2 489
## Reference
## Prediction N Y
## N 2998 469
## Y 54 489
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 2998
## 2 Y 469
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 54
## 2 489
## Reference
## Prediction N Y
## N 2998 469
## Y 54 489
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 2998
## 2 Y 469
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 54
## 2 489
## Reference
## Prediction N Y
## N 3037 665
## Y 15 293
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 3037
## 2 Y 665
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 15
## 2 293
## Reference
## Prediction N Y
## N 3052 958
## Y 0 0
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 3052
## 2 Y 958
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3856683
## 2 0.1 0.6554028
## 3 0.2 0.6554028
## 4 0.3 0.6554028
## 5 0.4 0.6515656
## 6 0.5 0.6515656
## 7 0.6 0.6515656
## 8 0.7 0.6515656
## 9 0.8 0.6515656
## 10 0.9 0.4628752
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.3000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 2299
## 2 Y 124
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 753
## 2 834
## Reference
## Prediction N Y
## N 2299 124
## Y 753 834
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 2299
## 2 Y 124
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 753
## 2 834
## Prediction
## Reference N Y
## N 2299 753
## Y 124 834
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.812968e-01 5.091572e-01 7.681753e-01 7.940070e-01 7.610973e-01
## AccuracyPValue McnemarPValue
## 1.315573e-03 8.396989e-100
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 971 48
## Y 337 362
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 971
## 2 Y 48
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 337
## 2 362
## Reference
## Prediction N Y
## N 971 48
## Y 337 362
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 971
## 2 Y 48
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 337
## 2 362
## Reference
## Prediction N Y
## N 971 48
## Y 337 362
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 971
## 2 Y 48
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 337
## 2 362
## Reference
## Prediction N Y
## N 1283 222
## Y 25 188
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 1283
## 2 Y 222
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 25
## 2 188
## Reference
## Prediction N Y
## N 1283 222
## Y 25 188
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 1283
## 2 Y 222
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 25
## 2 188
## Reference
## Prediction N Y
## N 1283 222
## Y 25 188
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 1283
## 2 Y 222
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 25
## 2 188
## Reference
## Prediction N Y
## N 1283 222
## Y 25 188
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 1283
## 2 Y 222
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 25
## 2 188
## Reference
## Prediction N Y
## N 1283 222
## Y 25 188
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 1283
## 2 Y 222
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 25
## 2 188
## Reference
## Prediction N Y
## N 1300 302
## Y 8 108
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 1300
## 2 Y 302
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 8
## 2 108
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3853383
## 2 0.1 0.6528404
## 3 0.2 0.6528404
## 4 0.3 0.6528404
## 5 0.4 0.6035313
## 6 0.5 0.6035313
## 7 0.6 0.6035313
## 8 0.7 0.6035313
## 9 0.8 0.6035313
## 10 0.9 0.4106464
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.3000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 971
## 2 Y 48
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 337
## 2 362
## Reference
## Prediction N Y
## N 971 48
## Y 337 362
## spam.fctr spam.fctr.predict.Conditional.X.rpart.N
## 1 N 971
## 2 Y 48
## spam.fctr.predict.Conditional.X.rpart.Y
## 1 337
## 2 362
## Prediction
## Reference N Y
## N 971 337
## Y 48 362
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.759022e-01 5.034615e-01 7.554310e-01 7.954265e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 8.204533e-02 8.939412e-49
## model_id model_method
## 1 Conditional.X.rpart rpart
## feats
## 1 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 3 17.521 3.007
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.8739962 0.3 0.6554028 0.8725535
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.7681753 0.794007 0.6153166 0.8629279
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.3 0.6528404 0.7759022
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.755431 0.7954265 0.5034615
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.02738113 0.08595926
## [1] "fitting model: Conditional.X.cp.0.rpart"
## [1] " indep_vars: T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank"
## Fitting cp = 0 on full training set
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 4010
##
## CP nsplit rel error
## 1 0.0580375783 0 1.0000000
## 2 0.0542797495 6 0.6002088
## 3 0.0203549061 7 0.5459290
## 4 0.0187891441 13 0.3977035
## 5 0.0114822547 14 0.3789144
## 6 0.0093945720 16 0.3559499
## 7 0.0083507307 18 0.3371608
## 8 0.0073068894 19 0.3288100
## 9 0.0067849687 21 0.3141962
## 10 0.0052192067 24 0.2912317
## 11 0.0041753653 25 0.2860125
## 12 0.0031315240 28 0.2734864
## 13 0.0020876827 33 0.2578288
## 14 0.0010438413 37 0.2494781
## 15 0.0002609603 46 0.2400835
## 16 0.0000000000 55 0.2369520
##
## Variable importance
## T.thank T.X2000 T.ect T.hou
## 10 7 6 6
## T.subject T.X2001 T.money T.life
## 5 5 4 4
## T.remov T.offer T.forward T.let
## 4 3 2 2
## T.research T.compani T.account T.websit
## 2 2 1 1
## T.onlin T.com T.pleas T.custom
## 1 1 1 1
## T.softwar T.energi T.num.chars T.messag
## 1 1 1 1
## T.num.words T.num.words.log T.click T.num.words.unq
## 1 1 1 1
## T.will T.invest T.num.chars.log T.email
## 1 1 1 1
## T.provid T.receiv T.secur T.free
## 1 1 1 1
## T.model T.market T.much T.thing
## 1 1 1 1
## T.effect
## 1
##
## Node number 1: 4010 observations, complexity param=0.05803758
## predicted class=N expected loss=0.2389027 P(node) =1
## class counts: 3052 958
## probabilities: 0.761 0.239
## left son=2 (1704 obs) right son=3 (2306 obs)
## Primary splits:
## T.thank < 0.5 to the right, improve=197.5233, (0 missing)
## T.X2000 < 0.5 to the right, improve=177.1062, (0 missing)
## T.ect < 0.5 to the right, improve=174.3138, (0 missing)
## T.subject < 1.5 to the right, improve=163.5183, (0 missing)
## T.hou < 0.5 to the right, improve=161.0811, (0 missing)
## Surrogate splits:
## T.subject < 1.5 to the right, agree=0.696, adj=0.285, (0 split)
## T.ect < 0.5 to the right, agree=0.678, adj=0.242, (0 split)
## T.hou < 0.5 to the right, agree=0.675, adj=0.236, (0 split)
## T.let < 0.5 to the right, agree=0.654, adj=0.185, (0 split)
## T.forward < 0.5 to the right, agree=0.653, adj=0.183, (0 split)
##
## Node number 2: 1704 observations, complexity param=0.01148225
## predicted class=N expected loss=0.05633803 P(node) =0.4249377
## class counts: 1608 96
## probabilities: 0.944 0.056
## left son=4 (1655 obs) right son=5 (49 obs)
## Primary splits:
## T.remov < 0.5 to the left, improve=31.18184, (0 missing)
## T.money < 2.5 to the left, improve=30.58203, (0 missing)
## T.click < 0.5 to the left, improve=27.27076, (0 missing)
## T.account < 3.5 to the left, improve=23.72691, (0 missing)
## T.free < 2.5 to the left, improve=19.87392, (0 missing)
## Surrogate splits:
## T.free < 2.5 to the left, agree=0.975, adj=0.143, (0 split)
## T.receiv < 11 to the left, agree=0.974, adj=0.102, (0 split)
## T.within < 4.5 to the left, agree=0.974, adj=0.102, (0 split)
## T.address < 10 to the left, agree=0.974, adj=0.102, (0 split)
## T.X000 < 20.5 to the left, agree=0.974, adj=0.102, (0 split)
##
## Node number 3: 2306 observations, complexity param=0.05803758
## predicted class=N expected loss=0.3738075 P(node) =0.5750623
## class counts: 1444 862
## probabilities: 0.626 0.374
## left son=6 (473 obs) right son=7 (1833 obs)
## Primary splits:
## T.X2000 < 0.5 to the right, improve=142.74190, (0 missing)
## T.ect < 0.5 to the right, improve=112.97750, (0 missing)
## T.life < 0.5 to the left, improve=110.07470, (0 missing)
## T.hou < 0.5 to the right, improve=103.61560, (0 missing)
## T.money < 0.5 to the left, improve= 95.05104, (0 missing)
## Surrogate splits:
## T.hou < 0.5 to the right, agree=0.846, adj=0.249, (0 split)
## T.ect < 0.5 to the right, agree=0.846, adj=0.247, (0 split)
## T.subject < 1.5 to the right, agree=0.817, adj=0.110, (0 split)
## T.corp < 0.5 to the right, agree=0.804, adj=0.042, (0 split)
## T.energi < 2.5 to the right, agree=0.803, adj=0.038, (0 split)
##
## Node number 4: 1655 observations, complexity param=0.01148225
## predicted class=N expected loss=0.03987915 P(node) =0.4127182
## class counts: 1589 66
## probabilities: 0.960 0.040
## left son=8 (1638 obs) right son=9 (17 obs)
## Primary splits:
## T.account < 3.5 to the left, improve=21.096360, (0 missing)
## T.money < 2.5 to the left, improve=16.683700, (0 missing)
## T.compani < 3.5 to the left, improve= 8.127608, (0 missing)
## T.secur < 0.5 to the left, improve= 8.084898, (0 missing)
## T.click < 0.5 to the left, improve= 8.060507, (0 missing)
## Surrogate splits:
## T.includ < 7.5 to the left, agree=0.992, adj=0.176, (0 split)
## T.cost < 8.5 to the left, agree=0.991, adj=0.118, (0 split)
## T.current < 6.5 to the left, agree=0.991, adj=0.118, (0 split)
##
## Node number 5: 49 observations, complexity param=0.009394572
## predicted class=Y expected loss=0.3877551 P(node) =0.01221945
## class counts: 19 30
## probabilities: 0.388 0.612
## left son=10 (25 obs) right son=11 (24 obs)
## Primary splits:
## T.receiv < 0.5 to the left, improve=8.718639, (0 missing)
## T.hou < 0.5 to the right, improve=8.265306, (0 missing)
## T.ect < 1 to the right, improve=8.265306, (0 missing)
## T.subject < 2.5 to the right, improve=7.710282, (0 missing)
## T.email < 0.5 to the left, improve=7.563552, (0 missing)
## Surrogate splits:
## T.list < 0.5 to the left, agree=0.776, adj=0.542, (0 split)
## T.wish < 0.5 to the left, agree=0.755, adj=0.500, (0 split)
## T.email < 0.5 to the left, agree=0.755, adj=0.500, (0 split)
## T.product < 0.5 to the left, agree=0.755, adj=0.500, (0 split)
## T.messag < 0.5 to the left, agree=0.755, adj=0.500, (0 split)
##
## Node number 6: 473 observations, complexity param=0.001043841
## predicted class=N expected loss=0.02748414 P(node) =0.1179551
## class counts: 460 13
## probabilities: 0.973 0.027
## left son=12 (450 obs) right son=13 (23 obs)
## Primary splits:
## T.remov < 0.5 to the left, improve=4.961741, (0 missing)
## T.email < 3.5 to the left, improve=4.204480, (0 missing)
## T.money < 0.5 to the left, improve=3.290745, (0 missing)
## T.mail < 2.5 to the left, improve=1.841953, (0 missing)
## T.address < 1.5 to the left, improve=1.841953, (0 missing)
## Surrogate splits:
## T.email < 7 to the left, agree=0.958, adj=0.130, (0 split)
## T.servic < 11 to the left, agree=0.958, adj=0.130, (0 split)
## T.address < 5.5 to the left, agree=0.956, adj=0.087, (0 split)
## T.allow < 4.5 to the left, agree=0.956, adj=0.087, (0 split)
## T.file < 3.5 to the left, agree=0.956, adj=0.087, (0 split)
##
## Node number 7: 1833 observations, complexity param=0.05803758
## predicted class=N expected loss=0.4631751 P(node) =0.4571072
## class counts: 984 849
## probabilities: 0.537 0.463
## left son=14 (246 obs) right son=15 (1587 obs)
## Primary splits:
## T.X2001 < 0.5 to the right, improve=91.92502, (0 missing)
## T.life < 0.5 to the left, improve=87.84373, (0 missing)
## T.research < 0.5 to the right, improve=80.56535, (0 missing)
## T.money < 0.5 to the left, improve=75.46572, (0 missing)
## T.offer < 0.5 to the left, improve=73.41183, (0 missing)
## Surrogate splits:
## T.ect < 0.5 to the right, agree=0.913, adj=0.350, (0 split)
## T.hou < 0.5 to the right, agree=0.912, adj=0.346, (0 split)
## T.subject < 1.5 to the right, agree=0.889, adj=0.175, (0 split)
## T.confer < 2.5 to the right, agree=0.872, adj=0.049, (0 split)
## T.energi < 1.5 to the right, agree=0.872, adj=0.049, (0 split)
##
## Node number 8: 1638 observations, complexity param=0.008350731
## predicted class=N expected loss=0.03174603 P(node) =0.4084788
## class counts: 1586 52
## probabilities: 0.968 0.032
## left son=16 (1630 obs) right son=17 (8 obs)
## Primary splits:
## T.money < 2.5 to the left, improve=15.073870, (0 missing)
## T.compani < 3.5 to the left, improve= 7.972490, (0 missing)
## T.now < 2.5 to the left, improve= 5.727125, (0 missing)
## T.net < 0.5 to the left, improve= 5.693787, (0 missing)
## T.custom < 1.5 to the left, improve= 5.145175, (0 missing)
## Surrogate splits:
## T.X000 < 10.5 to the left, agree=0.996, adj=0.250, (0 split)
## T.make < 5.5 to the left, agree=0.996, adj=0.250, (0 split)
## T.life < 2.5 to the left, agree=0.996, adj=0.125, (0 split)
## T.now < 4.5 to the left, agree=0.996, adj=0.125, (0 split)
## T.success < 2.5 to the left, agree=0.996, adj=0.125, (0 split)
##
## Node number 9: 17 observations
## predicted class=Y expected loss=0.1764706 P(node) =0.004239401
## class counts: 3 14
## probabilities: 0.176 0.824
##
## Node number 10: 25 observations, complexity param=0.003131524
## predicted class=N expected loss=0.32 P(node) =0.006234414
## class counts: 17 8
## probabilities: 0.680 0.320
## left son=20 (12 obs) right son=21 (13 obs)
## Primary splits:
## T.let < 0.5 to the right, improve=4.726154, (0 missing)
## T.email < 0.5 to the left, improve=3.380000, (0 missing)
## T.compani < 0.5 to the left, improve=3.022857, (0 missing)
## T.address < 0.5 to the left, improve=3.022857, (0 missing)
## T.price < 0.5 to the right, improve=2.880000, (0 missing)
## Surrogate splits:
## T.email < 0.5 to the left, agree=0.84, adj=0.667, (0 split)
## T.mail < 0.5 to the left, agree=0.80, adj=0.583, (0 split)
## T.price < 0.5 to the right, agree=0.80, adj=0.583, (0 split)
## T.know < 0.5 to the right, agree=0.80, adj=0.583, (0 split)
## T.compani < 0.5 to the left, agree=0.76, adj=0.500, (0 split)
##
## Node number 11: 24 observations
## predicted class=Y expected loss=0.08333333 P(node) =0.005985037
## class counts: 2 22
## probabilities: 0.083 0.917
##
## Node number 12: 450 observations
## predicted class=N expected loss=0.01111111 P(node) =0.1122195
## class counts: 445 5
## probabilities: 0.989 0.011
##
## Node number 13: 23 observations, complexity param=0.001043841
## predicted class=N expected loss=0.3478261 P(node) =0.005735661
## class counts: 15 8
## probabilities: 0.652 0.348
## left son=26 (9 obs) right son=27 (14 obs)
## Primary splits:
## T.forward < 0.5 to the right, improve=3.577640, (0 missing)
## T.X2000 < 1.5 to the right, improve=3.510540, (0 missing)
## T.data < 0.5 to the right, improve=2.968116, (0 missing)
## T.energi < 0.5 to the right, improve=2.968116, (0 missing)
## T.hou < 0.5 to the right, improve=2.968116, (0 missing)
## Surrogate splits:
## T.hou < 0.5 to the right, agree=0.870, adj=0.667, (0 split)
## T.subject < 2.5 to the right, agree=0.870, adj=0.667, (0 split)
## T.ect < 0.5 to the right, agree=0.826, adj=0.556, (0 split)
## T.valu < 0.5 to the right, agree=0.783, adj=0.444, (0 split)
## T.pleas < 1.5 to the right, agree=0.783, adj=0.444, (0 split)
##
## Node number 14: 246 observations, complexity param=0.007306889
## predicted class=N expected loss=0.06097561 P(node) =0.06134663
## class counts: 231 15
## probabilities: 0.939 0.061
## left son=28 (239 obs) right son=29 (7 obs)
## Primary splits:
## T.secur < 2.5 to the left, improve=12.706300, (0 missing)
## T.without < 1.5 to the left, improve=10.958550, (0 missing)
## T.compani < 5.5 to the left, improve=10.958550, (0 missing)
## T.net < 0.5 to the left, improve=10.224120, (0 missing)
## T.invest < 1.5 to the left, improve= 9.599705, (0 missing)
## Surrogate splits:
## T.invest < 3 to the left, agree=0.996, adj=0.857, (0 split)
## T.compani < 5.5 to the left, agree=0.988, adj=0.571, (0 split)
## T.buy < 1.5 to the left, agree=0.984, adj=0.429, (0 split)
## T.high < 1.5 to the left, agree=0.984, adj=0.429, (0 split)
## T.sever < 1.5 to the left, agree=0.984, adj=0.429, (0 split)
##
## Node number 15: 1587 observations, complexity param=0.05803758
## predicted class=Y expected loss=0.4744802 P(node) =0.3957606
## class counts: 753 834
## probabilities: 0.474 0.526
## left son=30 (1428 obs) right son=31 (159 obs)
## Primary splits:
## T.life < 0.5 to the left, improve=73.36123, (0 missing)
## T.research < 0.5 to the right, improve=71.02565, (0 missing)
## T.offer < 0.5 to the left, improve=63.17038, (0 missing)
## T.attach < 0.5 to the right, improve=61.88030, (0 missing)
## T.money < 0.5 to the left, improve=58.91211, (0 missing)
## Surrogate splits:
## T.messag < 2.5 to the left, agree=0.909, adj=0.094, (0 split)
## T.per < 1.5 to the left, agree=0.904, adj=0.044, (0 split)
## T.sever < 1.5 to the left, agree=0.904, adj=0.038, (0 split)
## T.sorri < 1.5 to the left, agree=0.904, adj=0.038, (0 split)
## T.now < 3.5 to the left, agree=0.902, adj=0.019, (0 split)
##
## Node number 16: 1630 observations, complexity param=0.003131524
## predicted class=N expected loss=0.02699387 P(node) =0.4064838
## class counts: 1586 44
## probabilities: 0.973 0.027
## left son=32 (1621 obs) right son=33 (9 obs)
## Primary splits:
## T.compani < 7.5 to the left, improve=7.406156, (0 missing)
## T.money < 0.5 to the left, improve=4.624848, (0 missing)
## T.net < 0.5 to the left, improve=4.482077, (0 missing)
## T.custom < 1.5 to the left, improve=4.382554, (0 missing)
## T.increas < 0.5 to the left, improve=3.426642, (0 missing)
## Surrogate splits:
## T.corpor < 5 to the left, agree=0.996, adj=0.333, (0 split)
## T.product < 4.5 to the left, agree=0.995, adj=0.111, (0 split)
## T.student < 9 to the left, agree=0.995, adj=0.111, (0 split)
##
## Node number 17: 8 observations
## predicted class=Y expected loss=0 P(node) =0.001995012
## class counts: 0 8
## probabilities: 0.000 1.000
##
## Node number 20: 12 observations
## predicted class=N expected loss=0 P(node) =0.002992519
## class counts: 12 0
## probabilities: 1.000 0.000
##
## Node number 21: 13 observations
## predicted class=Y expected loss=0.3846154 P(node) =0.003241895
## class counts: 5 8
## probabilities: 0.385 0.615
##
## Node number 26: 9 observations
## predicted class=N expected loss=0 P(node) =0.002244389
## class counts: 9 0
## probabilities: 1.000 0.000
##
## Node number 27: 14 observations
## predicted class=Y expected loss=0.4285714 P(node) =0.003491272
## class counts: 6 8
## probabilities: 0.429 0.571
##
## Node number 28: 239 observations, complexity param=0.001043841
## predicted class=N expected loss=0.0334728 P(node) =0.059601
## class counts: 231 8
## probabilities: 0.967 0.033
## left son=56 (232 obs) right son=57 (7 obs)
## Primary splits:
## T.net < 0.5 to the left, improve=4.173795, (0 missing)
## T.internet < 1.5 to the left, improve=4.173795, (0 missing)
## T.email < 2.5 to the left, improve=4.088837, (0 missing)
## T.free < 1.5 to the left, improve=3.602963, (0 missing)
## T.want < 2.5 to the left, improve=3.602963, (0 missing)
## Surrogate splits:
## T.free < 4 to the left, agree=0.983, adj=0.429, (0 split)
## T.email < 4 to the left, agree=0.983, adj=0.429, (0 split)
## T.site < 3 to the left, agree=0.983, adj=0.429, (0 split)
## T.http < 11 to the left, agree=0.983, adj=0.429, (0 split)
## T.click < 2.5 to the left, agree=0.979, adj=0.286, (0 split)
##
## Node number 29: 7 observations
## predicted class=Y expected loss=0 P(node) =0.001745636
## class counts: 0 7
## probabilities: 0.000 1.000
##
## Node number 30: 1428 observations, complexity param=0.05803758
## predicted class=N expected loss=0.4747899 P(node) =0.3561097
## class counts: 750 678
## probabilities: 0.525 0.475
## left son=60 (1279 obs) right son=61 (149 obs)
## Primary splits:
## T.money < 0.5 to the left, improve=65.78938, (0 missing)
## T.research < 0.5 to the right, improve=58.50566, (0 missing)
## T.offer < 0.5 to the left, improve=53.15943, (0 missing)
## T.websit < 0.5 to the left, improve=52.97876, (0 missing)
## T.attach < 0.5 to the right, improve=49.37902, (0 missing)
## Surrogate splits:
## T.websit < 5.5 to the left, agree=0.910, adj=0.141, (0 split)
## T.thing < 1.5 to the left, agree=0.909, adj=0.128, (0 split)
## T.invest < 0.5 to the left, agree=0.905, adj=0.094, (0 split)
## T.compani < 5.5 to the left, agree=0.901, adj=0.047, (0 split)
## T.buy < 1.5 to the left, agree=0.901, adj=0.047, (0 split)
##
## Node number 31: 159 observations
## predicted class=Y expected loss=0.01886792 P(node) =0.03965087
## class counts: 3 156
## probabilities: 0.019 0.981
##
## Node number 32: 1621 observations, complexity param=0.002087683
## predicted class=N expected loss=0.02344232 P(node) =0.4042394
## class counts: 1583 38
## probabilities: 0.977 0.023
## left son=64 (1578 obs) right son=65 (43 obs)
## Primary splits:
## T.money < 0.5 to the left, improve=4.770245, (0 missing)
## T.has.http < 0.5 to the left, improve=2.989141, (0 missing)
## T.http < 0.5 to the left, improve=2.989141, (0 missing)
## T.net < 0.5 to the left, improve=2.729713, (0 missing)
## T.X000 < 0.5 to the left, improve=2.475313, (0 missing)
## Surrogate splits:
## T.can < 9.5 to the left, agree=0.975, adj=0.070, (0 split)
## T.just < 5.5 to the left, agree=0.975, adj=0.047, (0 split)
## T.way < 3.5 to the left, agree=0.975, adj=0.047, (0 split)
## T.event < 7.5 to the left, agree=0.975, adj=0.047, (0 split)
## T.end < 4.5 to the left, agree=0.975, adj=0.047, (0 split)
##
## Node number 33: 9 observations
## predicted class=Y expected loss=0.3333333 P(node) =0.002244389
## class counts: 3 6
## probabilities: 0.333 0.667
##
## Node number 56: 232 observations
## predicted class=N expected loss=0.01724138 P(node) =0.05785536
## class counts: 228 4
## probabilities: 0.983 0.017
##
## Node number 57: 7 observations
## predicted class=Y expected loss=0.4285714 P(node) =0.001745636
## class counts: 3 4
## probabilities: 0.429 0.571
##
## Node number 60: 1279 observations, complexity param=0.05803758
## predicted class=N expected loss=0.4229867 P(node) =0.3189526
## class counts: 738 541
## probabilities: 0.577 0.423
## left son=120 (1112 obs) right son=121 (167 obs)
## Primary splits:
## T.offer < 0.5 to the left, improve=58.84636, (0 missing)
## T.research < 0.5 to the right, improve=52.78682, (0 missing)
## T.click < 0.5 to the left, improve=48.99561, (0 missing)
## T.remov < 0.5 to the left, improve=45.85111, (0 missing)
## T.attach < 0.5 to the right, improve=39.78679, (0 missing)
## Surrogate splits:
## T.softwar < 1.5 to the left, agree=0.890, adj=0.156, (0 split)
## T.version < 1.5 to the left, agree=0.881, adj=0.090, (0 split)
## T.special < 1.5 to the left, agree=0.880, adj=0.078, (0 split)
## T.just < 1.5 to the left, agree=0.879, adj=0.072, (0 split)
## T.list < 2.5 to the left, agree=0.876, adj=0.054, (0 split)
##
## Node number 61: 149 observations, complexity param=0.003131524
## predicted class=Y expected loss=0.08053691 P(node) =0.03715711
## class counts: 12 137
## probabilities: 0.081 0.919
## left son=122 (9 obs) right son=123 (140 obs)
## Primary splits:
## T.ask < 0.5 to the right, improve=6.581400, (0 missing)
## T.long < 1.5 to the right, improve=5.900112, (0 missing)
## T.contract < 1.5 to the right, improve=5.900112, (0 missing)
## T.case < 0.5 to the right, improve=5.900112, (0 missing)
## T.will < 9.5 to the right, improve=5.900112, (0 missing)
## Surrogate splits:
## T.state < 5.5 to the right, agree=0.973, adj=0.556, (0 split)
## T.long < 3 to the right, agree=0.973, adj=0.556, (0 split)
## T.contract < 1.5 to the right, agree=0.973, adj=0.556, (0 split)
## T.power < 2.5 to the right, agree=0.973, adj=0.556, (0 split)
## T.will < 9.5 to the right, agree=0.973, adj=0.556, (0 split)
##
## Node number 64: 1578 observations, complexity param=0.001043841
## predicted class=N expected loss=0.01711027 P(node) =0.3935162
## class counts: 1551 27
## probabilities: 0.983 0.017
## left son=128 (1500 obs) right son=129 (78 obs)
## Primary splits:
## T.link < 0.5 to the left, improve=1.584969, (0 missing)
## T.has.http < 0.5 to the left, improve=1.500919, (0 missing)
## T.http < 0.5 to the left, improve=1.500919, (0 missing)
## T.net < 0.5 to the left, improve=1.484907, (0 missing)
## T.click < 0.5 to the left, improve=1.287377, (0 missing)
## Surrogate splits:
## T.www < 2.5 to the left, agree=0.955, adj=0.090, (0 split)
## T.much < 3.5 to the left, agree=0.953, adj=0.051, (0 split)
## T.know < 7.5 to the left, agree=0.953, adj=0.051, (0 split)
## T.best < 4.5 to the left, agree=0.952, adj=0.038, (0 split)
## T.date < 5.5 to the left, agree=0.952, adj=0.038, (0 split)
##
## Node number 65: 43 observations, complexity param=0.002087683
## predicted class=N expected loss=0.255814 P(node) =0.01072319
## class counts: 32 11
## probabilities: 0.744 0.256
## left son=130 (25 obs) right son=131 (18 obs)
## Primary splits:
## T.subject < 1.5 to the right, improve=7.816537, (0 missing)
## T.has.http < 0.5 to the left, improve=5.141790, (0 missing)
## T.http < 0.5 to the left, improve=5.141790, (0 missing)
## T.know < 0.5 to the right, improve=4.455426, (0 missing)
## T.forward < 0.5 to the right, improve=3.986812, (0 missing)
## Surrogate splits:
## T.forward < 0.5 to the right, agree=0.837, adj=0.611, (0 split)
## T.X2001 < 0.5 to the right, agree=0.791, adj=0.500, (0 split)
## T.num.chars < 988 to the right, agree=0.767, adj=0.444, (0 split)
## T.num.words < 87.5 to the right, agree=0.767, adj=0.444, (0 split)
## T.num.words.log < 4.482859 to the right, agree=0.767, adj=0.444, (0 split)
##
## Node number 120: 1112 observations, complexity param=0.05427975
## predicted class=N expected loss=0.3642086 P(node) =0.2773067
## class counts: 707 405
## probabilities: 0.636 0.364
## left son=240 (1044 obs) right son=241 (68 obs)
## Primary splits:
## T.remov < 0.5 to the left, improve=38.89060, (0 missing)
## T.research < 0.5 to the right, improve=38.85484, (0 missing)
## T.click < 0.5 to the left, improve=37.02807, (0 missing)
## T.onlin < 0.5 to the left, improve=32.06984, (0 missing)
## T.has.http < 0.5 to the left, improve=31.36534, (0 missing)
## Surrogate splits:
## T.name < 5 to the left, agree=0.942, adj=0.059, (0 split)
## T.free < 3.5 to the left, agree=0.941, adj=0.029, (0 split)
## T.confirm < 3.5 to the left, agree=0.941, adj=0.029, (0 split)
## T.special < 2.5 to the left, agree=0.940, adj=0.015, (0 split)
## T.mail < 3.5 to the left, agree=0.940, adj=0.015, (0 split)
##
## Node number 121: 167 observations, complexity param=0.009394572
## predicted class=Y expected loss=0.1856287 P(node) =0.04164589
## class counts: 31 136
## probabilities: 0.186 0.814
## left son=242 (9 obs) right son=243 (158 obs)
## Primary splits:
## T.model < 0.5 to the right, improve=12.617600, (0 missing)
## T.energi < 0.5 to the right, improve=10.856620, (0 missing)
## T.either < 0.5 to the right, improve= 9.423419, (0 missing)
## T.fax < 0.5 to the right, improve= 9.423419, (0 missing)
## T.discuss < 0.5 to the right, improve= 9.423419, (0 missing)
## Surrogate splits:
## T.provid < 3.5 to the right, agree=0.958, adj=0.222, (0 split)
## T.short < 1.5 to the right, agree=0.958, adj=0.222, (0 split)
## T.set < 2.5 to the right, agree=0.958, adj=0.222, (0 split)
## T.X000 < 2.5 to the right, agree=0.952, adj=0.111, (0 split)
## T.new < 4.5 to the right, agree=0.952, adj=0.111, (0 split)
##
## Node number 122: 9 observations
## predicted class=N expected loss=0.3333333 P(node) =0.002244389
## class counts: 6 3
## probabilities: 0.667 0.333
##
## Node number 123: 140 observations
## predicted class=Y expected loss=0.04285714 P(node) =0.03491272
## class counts: 6 134
## probabilities: 0.043 0.957
##
## Node number 128: 1500 observations, complexity param=0.001043841
## predicted class=N expected loss=0.012 P(node) =0.3740648
## class counts: 1482 18
## probabilities: 0.988 0.012
## left son=256 (1452 obs) right son=257 (48 obs)
## Primary splits:
## T.net < 0.5 to the left, improve=1.2663470, (0 missing)
## T.allow < 0.5 to the left, improve=0.8127361, (0 missing)
## T.per < 2.5 to the left, improve=0.8002826, (0 missing)
## T.rate < 0.5 to the left, improve=0.6794272, (0 missing)
## T.has.http < 0.5 to the left, improve=0.6615622, (0 missing)
## Surrogate splits:
## T.might < 4.5 to the left, agree=0.971, adj=0.083, (0 split)
## T.life < 1.5 to the left, agree=0.970, adj=0.062, (0 split)
## T.communic < 20 to the left, agree=0.970, adj=0.062, (0 split)
## T.direct < 4.5 to the left, agree=0.970, adj=0.062, (0 split)
## T.offer < 7.5 to the left, agree=0.969, adj=0.042, (0 split)
##
## Node number 129: 78 observations
## predicted class=N expected loss=0.1153846 P(node) =0.01945137
## class counts: 69 9
## probabilities: 0.885 0.115
##
## Node number 130: 25 observations
## predicted class=N expected loss=0 P(node) =0.006234414
## class counts: 25 0
## probabilities: 1.000 0.000
##
## Node number 131: 18 observations
## predicted class=Y expected loss=0.3888889 P(node) =0.004488778
## class counts: 7 11
## probabilities: 0.389 0.611
##
## Node number 240: 1044 observations, complexity param=0.02035491
## predicted class=N expected loss=0.3304598 P(node) =0.2603491
## class counts: 699 345
## probabilities: 0.670 0.330
## left son=480 (153 obs) right son=481 (891 obs)
## Primary splits:
## T.research < 0.5 to the right, improve=33.20425, (0 missing)
## T.onlin < 0.5 to the left, improve=29.71891, (0 missing)
## T.pleas < 0.5 to the right, improve=29.59940, (0 missing)
## T.meet < 0.5 to the right, improve=24.77513, (0 missing)
## T.custom < 0.5 to the left, improve=22.86556, (0 missing)
## Surrogate splits:
## T.group < 1.5 to the right, agree=0.865, adj=0.078, (0 split)
## T.approv < 2.5 to the right, agree=0.861, adj=0.052, (0 split)
## T.request < 3.5 to the right, agree=0.860, adj=0.046, (0 split)
## T.meet < 3.5 to the right, agree=0.860, adj=0.046, (0 split)
## T.engin < 1.5 to the right, agree=0.857, adj=0.026, (0 split)
##
## Node number 241: 68 observations, complexity param=0.001043841
## predicted class=Y expected loss=0.1176471 P(node) =0.01695761
## class counts: 8 60
## probabilities: 0.118 0.882
## left son=482 (9 obs) right son=483 (59 obs)
## Primary splits:
## T.regard < 0.5 to the right, improve=3.978287, (0 missing)
## T.send < 0.5 to the right, improve=2.215575, (0 missing)
## T.can < 0.5 to the right, improve=1.790603, (0 missing)
## T.need < 0.5 to the right, improve=1.588801, (0 missing)
## T.receiv < 0.5 to the right, improve=1.332991, (0 missing)
## Surrogate splits:
## T.trade < 0.5 to the right, agree=0.941, adj=0.556, (0 split)
## T.file < 0.5 to the right, agree=0.926, adj=0.444, (0 split)
## T.power < 0.5 to the right, agree=0.926, adj=0.444, (0 split)
## T.addit < 0.5 to the right, agree=0.912, adj=0.333, (0 split)
## T.support < 0.5 to the right, agree=0.912, adj=0.333, (0 split)
##
## Node number 242: 9 observations
## predicted class=N expected loss=0 P(node) =0.002244389
## class counts: 9 0
## probabilities: 1.000 0.000
##
## Node number 243: 158 observations, complexity param=0.007306889
## predicted class=Y expected loss=0.1392405 P(node) =0.0394015
## class counts: 22 136
## probabilities: 0.139 0.861
## left son=486 (9 obs) right son=487 (149 obs)
## Primary splits:
## T.energi < 0.5 to the right, improve=10.726510, (0 missing)
## T.either < 0.5 to the right, improve= 7.782441, (0 missing)
## T.group < 0.5 to the right, improve= 7.037054, (0 missing)
## T.research < 0.5 to the right, improve= 6.713958, (0 missing)
## T.year < 0.5 to the right, improve= 6.378699, (0 missing)
## Surrogate splits:
## T.servic < 7.5 to the right, agree=0.956, adj=0.222, (0 split)
## T.corpor < 0.5 to the right, agree=0.956, adj=0.222, (0 split)
## T.plan < 3.5 to the right, agree=0.956, adj=0.222, (0 split)
## T.cours < 1.5 to the right, agree=0.956, adj=0.222, (0 split)
## T.respons < 1.5 to the right, agree=0.956, adj=0.222, (0 split)
##
## Node number 256: 1452 observations
## predicted class=N expected loss=0.008264463 P(node) =0.3620948
## class counts: 1440 12
## probabilities: 0.992 0.008
##
## Node number 257: 48 observations, complexity param=0.001043841
## predicted class=N expected loss=0.125 P(node) =0.01197007
## class counts: 42 6
## probabilities: 0.875 0.125
## left son=514 (39 obs) right son=515 (9 obs)
## Primary splits:
## T.num.words < 82 to the right, improve=6.5, (0 missing)
## T.num.words.log < 4.41855 to the right, improve=6.5, (0 missing)
## T.num.chars < 975.5 to the right, improve=5.7, (0 missing)
## T.num.chars.log < 6.883814 to the right, improve=5.7, (0 missing)
## T.subject < 1.5 to the right, improve=5.7, (0 missing)
## Surrogate splits:
## T.num.words.log < 4.41855 to the right, agree=1.000, adj=1.000, (0 split)
## T.num.chars < 907.5 to the right, agree=0.979, adj=0.889, (0 split)
## T.num.chars.log < 6.811303 to the right, agree=0.979, adj=0.889, (0 split)
## T.subject < 1.5 to the right, agree=0.979, adj=0.889, (0 split)
## T.num.words.unq < 67.5 to the right, agree=0.958, adj=0.778, (0 split)
##
## Node number 480: 153 observations
## predicted class=N expected loss=0.02614379 P(node) =0.03815461
## class counts: 149 4
## probabilities: 0.974 0.026
##
## Node number 481: 891 observations, complexity param=0.02035491
## predicted class=N expected loss=0.382716 P(node) =0.2221945
## class counts: 550 341
## probabilities: 0.617 0.383
## left son=962 (838 obs) right son=963 (53 obs)
## Primary splits:
## T.onlin < 0.5 to the left, improve=26.53361, (0 missing)
## T.custom < 0.5 to the left, improve=24.92004, (0 missing)
## T.pleas < 0.5 to the right, improve=24.65708, (0 missing)
## T.attach < 0.5 to the right, improve=22.12437, (0 missing)
## T.meet < 0.5 to the right, improve=20.05601, (0 missing)
## Surrogate splits:
## T.account < 6 to the left, agree=0.942, adj=0.019, (0 split)
## T.develop < 4.5 to the left, agree=0.942, adj=0.019, (0 split)
##
## Node number 482: 9 observations
## predicted class=N expected loss=0.4444444 P(node) =0.002244389
## class counts: 5 4
## probabilities: 0.556 0.444
##
## Node number 483: 59 observations
## predicted class=Y expected loss=0.05084746 P(node) =0.01471322
## class counts: 3 56
## probabilities: 0.051 0.949
##
## Node number 486: 9 observations
## predicted class=N expected loss=0.1111111 P(node) =0.002244389
## class counts: 8 1
## probabilities: 0.889 0.111
##
## Node number 487: 149 observations, complexity param=0.002087683
## predicted class=Y expected loss=0.09395973 P(node) =0.03715711
## class counts: 14 135
## probabilities: 0.094 0.906
## left son=974 (8 obs) right son=975 (141 obs)
## Primary splits:
## T.research < 0.5 to the right, improve=4.768064, (0 missing)
## T.call < 0.5 to the right, improve=3.561664, (0 missing)
## T.well < 0.5 to the right, improve=3.349007, (0 missing)
## T.year < 0.5 to the right, improve=2.718276, (0 missing)
## T.question < 0.5 to the right, improve=2.718276, (0 missing)
## Surrogate splits:
## T.discuss < 0.5 to the right, agree=0.973, adj=0.500, (0 split)
## T.team < 1.5 to the right, agree=0.966, adj=0.375, (0 split)
## T.get < 2.5 to the right, agree=0.960, adj=0.250, (0 split)
## T.term < 0.5 to the right, agree=0.960, adj=0.250, (0 split)
## T.member < 0.5 to the right, agree=0.960, adj=0.250, (0 split)
##
## Node number 514: 39 observations
## predicted class=N expected loss=0 P(node) =0.009725686
## class counts: 39 0
## probabilities: 1.000 0.000
##
## Node number 515: 9 observations
## predicted class=Y expected loss=0.3333333 P(node) =0.002244389
## class counts: 3 6
## probabilities: 0.333 0.667
##
## Node number 962: 838 observations, complexity param=0.02035491
## predicted class=N expected loss=0.3520286 P(node) =0.2089776
## class counts: 543 295
## probabilities: 0.648 0.352
## left son=1924 (227 obs) right son=1925 (611 obs)
## Primary splits:
## T.pleas < 0.5 to the right, improve=24.37265, (0 missing)
## T.custom < 0.5 to the left, improve=22.57384, (0 missing)
## T.com < 0.5 to the left, improve=19.90311, (0 missing)
## T.attach < 0.5 to the right, improve=19.37764, (0 missing)
## T.click < 0.5 to the left, improve=17.94325, (0 missing)
## Surrogate splits:
## T.contact < 0.5 to the right, agree=0.755, adj=0.097, (0 split)
## T.resourc < 0.5 to the right, agree=0.751, adj=0.079, (0 split)
## T.question < 0.5 to the right, agree=0.751, adj=0.079, (0 split)
## T.manag < 1.5 to the right, agree=0.749, adj=0.075, (0 split)
## T.access < 0.5 to the right, agree=0.747, adj=0.066, (0 split)
##
## Node number 963: 53 observations, complexity param=0.002087683
## predicted class=Y expected loss=0.1320755 P(node) =0.01321696
## class counts: 7 46
## probabilities: 0.132 0.868
## left son=1926 (8 obs) right son=1927 (45 obs)
## Primary splits:
## T.num.words.unq < 67 to the right, improve=4.578721, (0 missing)
## T.num.words.unq.log < 4.219075 to the right, improve=4.578721, (0 missing)
## T.num.chars < 512 to the right, improve=3.345083, (0 missing)
## T.num.words < 49.5 to the right, improve=3.345083, (0 missing)
## T.num.words.log < 3.917987 to the right, improve=3.345083, (0 missing)
## Surrogate splits:
## T.num.words.unq.log < 4.219075 to the right, agree=1.000, adj=1.000, (0 split)
## T.num.chars < 1087.5 to the right, agree=0.981, adj=0.875, (0 split)
## T.num.chars.log < 6.992411 to the right, agree=0.981, adj=0.875, (0 split)
## T.num.words < 74 to the right, agree=0.962, adj=0.750, (0 split)
## T.num.words.log < 4.317132 to the right, agree=0.962, adj=0.750, (0 split)
##
## Node number 974: 8 observations
## predicted class=N expected loss=0.375 P(node) =0.001995012
## class counts: 5 3
## probabilities: 0.625 0.375
##
## Node number 975: 141 observations
## predicted class=Y expected loss=0.06382979 P(node) =0.03516209
## class counts: 9 132
## probabilities: 0.064 0.936
##
## Node number 1924: 227 observations, complexity param=0.004175365
## predicted class=N expected loss=0.154185 P(node) =0.05660848
## class counts: 192 35
## probabilities: 0.846 0.154
## left son=3848 (209 obs) right son=3849 (18 obs)
## Primary splits:
## T.account < 0.5 to the left, improve=8.163455, (0 missing)
## T.right < 0.5 to the left, improve=7.631219, (0 missing)
## T.email < 1.5 to the left, improve=6.384407, (0 missing)
## T.program < 1.5 to the left, improve=5.887414, (0 missing)
## T.rate < 1.5 to the left, improve=5.375399, (0 missing)
## Surrogate splits:
## T.may < 2.5 to the left, agree=0.938, adj=0.222, (0 split)
## T.relat < 1.5 to the left, agree=0.938, adj=0.222, (0 split)
## T.posit < 2.5 to the left, agree=0.938, adj=0.222, (0 split)
## T.secur < 4.5 to the left, agree=0.934, adj=0.167, (0 split)
## T.list < 4.5 to the left, agree=0.934, adj=0.167, (0 split)
##
## Node number 1925: 611 observations, complexity param=0.02035491
## predicted class=N expected loss=0.4255319 P(node) =0.1523691
## class counts: 351 260
## probabilities: 0.574 0.426
## left son=3850 (503 obs) right son=3851 (108 obs)
## Primary splits:
## T.com < 0.5 to the left, improve=26.06896, (0 missing)
## T.custom < 0.5 to the left, improve=22.17378, (0 missing)
## T.regard < 0.5 to the right, improve=20.37512, (0 missing)
## T.meet < 0.5 to the right, improve=18.73653, (0 missing)
## T.click < 0.5 to the left, improve=17.13182, (0 missing)
## Surrogate splits:
## T.has.http < 0.5 to the left, agree=0.861, adj=0.213, (0 split)
## T.http < 0.5 to the left, agree=0.861, adj=0.213, (0 split)
## T.www < 0.5 to the left, agree=0.861, adj=0.213, (0 split)
## T.invest < 1.5 to the left, agree=0.830, adj=0.037, (0 split)
## T.site < 0.5 to the left, agree=0.830, adj=0.037, (0 split)
##
## Node number 1926: 8 observations
## predicted class=N expected loss=0.375 P(node) =0.001995012
## class counts: 5 3
## probabilities: 0.625 0.375
##
## Node number 1927: 45 observations
## predicted class=Y expected loss=0.04444444 P(node) =0.01122195
## class counts: 2 43
## probabilities: 0.044 0.956
##
## Node number 3848: 209 observations, complexity param=0.004175365
## predicted class=N expected loss=0.1148325 P(node) =0.0521197
## class counts: 185 24
## probabilities: 0.885 0.115
## left son=7696 (201 obs) right son=7697 (8 obs)
## Primary splits:
## T.program < 1.5 to the left, improve=6.711919, (0 missing)
## T.email < 1.5 to the left, improve=6.256536, (0 missing)
## T.just < 0.5 to the left, improve=5.675204, (0 missing)
## T.mail < 0.5 to the left, improve=5.471470, (0 missing)
## T.click < 0.5 to the left, improve=5.291217, (0 missing)
## Surrogate splits:
## T.sorri < 0.5 to the left, agree=0.971, adj=0.250, (0 split)
## T.return < 1.5 to the left, agree=0.967, adj=0.125, (0 split)
## T.state < 1.5 to the left, agree=0.967, adj=0.125, (0 split)
##
## Node number 3849: 18 observations
## predicted class=Y expected loss=0.3888889 P(node) =0.004488778
## class counts: 7 11
## probabilities: 0.389 0.611
##
## Node number 3850: 503 observations, complexity param=0.02035491
## predicted class=N expected loss=0.3578529 P(node) =0.1254364
## class counts: 323 180
## probabilities: 0.642 0.358
## left son=7700 (466 obs) right son=7701 (37 obs)
## Primary splits:
## T.custom < 0.5 to the left, improve=25.14443, (0 missing)
## T.regard < 0.5 to the right, improve=18.51732, (0 missing)
## T.click < 0.5 to the left, improve=15.74012, (0 missing)
## T.websit < 0.5 to the left, improve=15.21537, (0 missing)
## T.meet < 0.5 to the right, improve=13.30012, (0 missing)
## Surrogate splits:
## T.hello < 0.5 to the left, agree=0.936, adj=0.135, (0 split)
## T.X000 < 0.5 to the left, agree=0.930, adj=0.054, (0 split)
## T.design < 2.5 to the left, agree=0.930, adj=0.054, (0 split)
## T.creat < 2.5 to the left, agree=0.930, adj=0.054, (0 split)
## T.num.words.unq < 229.5 to the left, agree=0.930, adj=0.054, (0 split)
##
## Node number 3851: 108 observations, complexity param=0.004175365
## predicted class=Y expected loss=0.2592593 P(node) =0.02693267
## class counts: 28 80
## probabilities: 0.259 0.741
## left son=7702 (14 obs) right son=7703 (94 obs)
## Primary splits:
## T.will < 0.5 to the right, improve=4.733761, (0 missing)
## T.num.chars < 403 to the right, improve=3.413370, (0 missing)
## T.num.chars.log < 6.001412 to the right, improve=3.413370, (0 missing)
## T.also < 0.5 to the right, improve=3.259259, (0 missing)
## T.dear < 0.5 to the right, improve=3.259259, (0 missing)
## Surrogate splits:
## T.custom < 2 to the right, agree=0.889, adj=0.143, (0 split)
## T.list < 1.5 to the right, agree=0.889, adj=0.143, (0 split)
## T.line < 0.5 to the right, agree=0.889, adj=0.143, (0 split)
## T.info < 2.5 to the right, agree=0.889, adj=0.143, (0 split)
## T.name < 2 to the right, agree=0.889, adj=0.143, (0 split)
##
## Node number 7696: 201 observations, complexity param=0.001043841
## predicted class=N expected loss=0.08955224 P(node) =0.05012469
## class counts: 183 18
## probabilities: 0.910 0.090
## left son=15392 (188 obs) right son=15393 (13 obs)
## Primary splits:
## T.email < 1.5 to the left, improve=5.601815, (0 missing)
## T.just < 0.5 to the left, improve=4.281092, (0 missing)
## T.click < 0.5 to the left, improve=3.982469, (0 missing)
## T.visit < 0.5 to the left, improve=2.832876, (0 missing)
## T.mail < 0.5 to the left, improve=2.380594, (0 missing)
## Surrogate splits:
## T.receiv < 3.5 to the left, agree=0.945, adj=0.154, (0 split)
## T.per < 0.5 to the left, agree=0.945, adj=0.154, (0 split)
## T.real < 2 to the left, agree=0.945, adj=0.154, (0 split)
## T.wish < 0.5 to the left, agree=0.940, adj=0.077, (0 split)
## T.X000 < 0.5 to the left, agree=0.940, adj=0.077, (0 split)
##
## Node number 7697: 8 observations
## predicted class=Y expected loss=0.25 P(node) =0.001995012
## class counts: 2 6
## probabilities: 0.250 0.750
##
## Node number 7700: 466 observations, complexity param=0.02035491
## predicted class=N expected loss=0.3133047 P(node) =0.1162095
## class counts: 320 146
## probabilities: 0.687 0.313
## left son=15400 (442 obs) right son=15401 (24 obs)
## Primary splits:
## T.click < 0.5 to the left, improve=18.42302, (0 missing)
## T.websit < 0.5 to the left, improve=14.97257, (0 missing)
## T.full < 0.5 to the left, improve=14.65670, (0 missing)
## T.regard < 0.5 to the right, improve=14.19770, (0 missing)
## T.compani < 2.5 to the left, improve=13.91106, (0 missing)
## Surrogate splits:
## T.home < 2.5 to the left, agree=0.953, adj=0.083, (0 split)
## T.get < 2.5 to the left, agree=0.951, adj=0.042, (0 split)
##
## Node number 7701: 37 observations
## predicted class=Y expected loss=0.08108108 P(node) =0.009226933
## class counts: 3 34
## probabilities: 0.081 0.919
##
## Node number 7702: 14 observations
## predicted class=N expected loss=0.3571429 P(node) =0.003491272
## class counts: 9 5
## probabilities: 0.643 0.357
##
## Node number 7703: 94 observations, complexity param=0.001043841
## predicted class=Y expected loss=0.2021277 P(node) =0.0234414
## class counts: 19 75
## probabilities: 0.202 0.798
## left son=15406 (11 obs) right son=15407 (83 obs)
## Primary splits:
## T.power < 0.5 to the right, improve=2.936893, (0 missing)
## T.manag < 0.5 to the right, improve=2.062991, (0 missing)
## T.num.chars < 183 to the left, improve=2.030450, (0 missing)
## T.num.chars.log < 5.214877 to the left, improve=2.030450, (0 missing)
## T.messag < 0.5 to the left, improve=1.945816, (0 missing)
## Surrogate splits:
## T.fax < 0.5 to the right, agree=0.915, adj=0.273, (0 split)
## T.gas < 0.5 to the right, agree=0.894, adj=0.091, (0 split)
## T.book < 0.5 to the right, agree=0.894, adj=0.091, (0 split)
## T.manag < 0.5 to the right, agree=0.894, adj=0.091, (0 split)
## T.let < 0.5 to the right, agree=0.894, adj=0.091, (0 split)
##
## Node number 15392: 188 observations
## predicted class=N expected loss=0.05851064 P(node) =0.04688279
## class counts: 177 11
## probabilities: 0.941 0.059
##
## Node number 15393: 13 observations
## predicted class=Y expected loss=0.4615385 P(node) =0.003241895
## class counts: 6 7
## probabilities: 0.462 0.538
##
## Node number 15400: 442 observations, complexity param=0.01878914
## predicted class=N expected loss=0.280543 P(node) =0.1102244
## class counts: 318 124
## probabilities: 0.719 0.281
## left son=30800 (420 obs) right son=30801 (22 obs)
## Primary splits:
## T.websit < 0.5 to the left, improve=18.29374, (0 missing)
## T.compani < 2.5 to the left, improve=15.43240, (0 missing)
## T.market < 3.5 to the left, improve=15.43240, (0 missing)
## T.provid < 1.5 to the left, improve=14.97878, (0 missing)
## T.full < 0.5 to the left, improve=14.44194, (0 missing)
## Surrogate splits:
## T.compani < 2.5 to the left, agree=0.984, adj=0.682, (0 split)
## T.provid < 1.5 to the left, agree=0.984, adj=0.682, (0 split)
## T.market < 3.5 to the left, agree=0.984, adj=0.682, (0 split)
## T.much < 1.5 to the left, agree=0.982, adj=0.636, (0 split)
## T.effect < 1.5 to the left, agree=0.980, adj=0.591, (0 split)
##
## Node number 15401: 24 observations
## predicted class=Y expected loss=0.08333333 P(node) =0.005985037
## class counts: 2 22
## probabilities: 0.083 0.917
##
## Node number 15406: 11 observations
## predicted class=N expected loss=0.4545455 P(node) =0.002743142
## class counts: 6 5
## probabilities: 0.545 0.455
##
## Node number 15407: 83 observations
## predicted class=Y expected loss=0.1566265 P(node) =0.02069825
## class counts: 13 70
## probabilities: 0.157 0.843
##
## Node number 30800: 420 observations, complexity param=0.006784969
## predicted class=N expected loss=0.247619 P(node) =0.1047382
## class counts: 316 104
## probabilities: 0.752 0.248
## left son=61600 (125 obs) right son=61601 (295 obs)
## Primary splits:
## T.will < 0.5 to the right, improve=10.000320, (0 missing)
## T.num.chars < 480.5 to the right, improve= 9.913112, (0 missing)
## T.num.chars.log < 6.176906 to the right, improve= 9.913112, (0 missing)
## T.num.words < 44.5 to the right, improve= 9.205959, (0 missing)
## T.num.words.log < 3.817652 to the right, improve= 9.205959, (0 missing)
## Surrogate splits:
## T.num.chars < 676 to the right, agree=0.776, adj=0.248, (0 split)
## T.num.chars.log < 6.517661 to the right, agree=0.776, adj=0.248, (0 split)
## T.num.words < 56.5 to the right, agree=0.767, adj=0.216, (0 split)
## T.num.words.log < 4.051747 to the right, agree=0.767, adj=0.216, (0 split)
## T.num.words.unq < 46.5 to the right, agree=0.764, adj=0.208, (0 split)
##
## Node number 30801: 22 observations
## predicted class=Y expected loss=0.09090909 P(node) =0.005486284
## class counts: 2 20
## probabilities: 0.091 0.909
##
## Node number 61600: 125 observations
## predicted class=N expected loss=0.08 P(node) =0.03117207
## class counts: 115 10
## probabilities: 0.920 0.080
##
## Node number 61601: 295 observations, complexity param=0.006784969
## predicted class=N expected loss=0.3186441 P(node) =0.07356608
## class counts: 201 94
## probabilities: 0.681 0.319
## left son=123202 (280 obs) right son=123203 (15 obs)
## Primary splits:
## T.softwar < 0.5 to the left, improve=11.942530, (0 missing)
## T.messag < 0.5 to the left, improve= 7.199393, (0 missing)
## T.regard < 0.5 to the right, improve= 6.178422, (0 missing)
## T.net < 0.5 to the left, improve= 6.037417, (0 missing)
## T.number < 0.5 to the right, improve= 6.035214, (0 missing)
##
## Node number 123202: 280 observations, complexity param=0.006784969
## predicted class=N expected loss=0.2857143 P(node) =0.06982544
## class counts: 200 80
## probabilities: 0.714 0.286
## left son=246404 (253 obs) right son=246405 (27 obs)
## Primary splits:
## T.messag < 0.5 to the left, improve=8.673066, (0 missing)
## T.net < 0.5 to the left, improve=5.719538, (0 missing)
## T.mail < 0.5 to the left, improve=5.045045, (0 missing)
## T.regard < 0.5 to the right, improve=4.880733, (0 missing)
## T.number < 0.5 to the right, improve=4.878600, (0 missing)
## Surrogate splits:
## T.mail < 0.5 to the left, agree=0.929, adj=0.259, (0 split)
## T.return < 0.5 to the left, agree=0.921, adj=0.185, (0 split)
## T.follow < 1.5 to the left, agree=0.921, adj=0.185, (0 split)
## T.net < 1.5 to the left, agree=0.914, adj=0.111, (0 split)
## T.address < 0.5 to the left, agree=0.914, adj=0.111, (0 split)
##
## Node number 123203: 15 observations
## predicted class=Y expected loss=0.06666667 P(node) =0.003740648
## class counts: 1 14
## probabilities: 0.067 0.933
##
## Node number 246404: 253 observations, complexity param=0.005219207
## predicted class=N expected loss=0.2450593 P(node) =0.06309227
## class counts: 191 62
## probabilities: 0.755 0.245
## left son=492808 (242 obs) right son=492809 (11 obs)
## Primary splits:
## T.free < 0.5 to the left, improve=5.348185, (0 missing)
## T.now < 0.5 to the left, improve=4.690353, (0 missing)
## T.regard < 0.5 to the right, improve=4.087985, (0 missing)
## T.number < 0.5 to the right, improve=3.331946, (0 missing)
## T.attach < 0.5 to the right, improve=2.608356, (0 missing)
## Surrogate splits:
## T.sure < 0.5 to the left, agree=0.968, adj=0.273, (0 split)
## T.site < 0.5 to the left, agree=0.964, adj=0.182, (0 split)
## T.home < 1.5 to the left, agree=0.960, adj=0.091, (0 split)
##
## Node number 246405: 27 observations, complexity param=0.003131524
## predicted class=Y expected loss=0.3333333 P(node) =0.006733167
## class counts: 9 18
## probabilities: 0.333 0.667
## left son=492810 (13 obs) right son=492811 (14 obs)
## Primary splits:
## T.mail < 0.5 to the left, improve=3.989011, (0 missing)
## T.num.words.unq < 35.5 to the right, improve=2.742857, (0 missing)
## T.num.chars < 475 to the right, improve=2.742857, (0 missing)
## T.num.words < 42.5 to the right, improve=2.742857, (0 missing)
## T.num.words.unq.log < 3.597218 to the right, improve=2.742857, (0 missing)
## Surrogate splits:
## T.receiv < 0.5 to the left, agree=0.741, adj=0.462, (0 split)
## T.address < 0.5 to the left, agree=0.741, adj=0.462, (0 split)
## T.return < 0.5 to the left, agree=0.741, adj=0.462, (0 split)
## T.follow < 1.5 to the left, agree=0.741, adj=0.462, (0 split)
## T.see < 0.5 to the left, agree=0.704, adj=0.385, (0 split)
##
## Node number 492808: 242 observations, complexity param=0.003131524
## predicted class=N expected loss=0.2231405 P(node) =0.06034913
## class counts: 188 54
## probabilities: 0.777 0.223
## left son=985616 (231 obs) right son=985617 (11 obs)
## Primary splits:
## T.now < 0.5 to the left, improve=3.935458, (0 missing)
## T.regard < 0.5 to the right, improve=3.281108, (0 missing)
## T.number < 0.5 to the right, improve=2.776402, (0 missing)
## T.one < 0.5 to the left, improve=2.559764, (0 missing)
## T.thing < 0.5 to the left, improve=2.278019, (0 missing)
## Surrogate splits:
## T.futur < 1.5 to the left, agree=0.963, adj=0.182, (0 split)
## T.use < 2 to the left, agree=0.963, adj=0.182, (0 split)
##
## Node number 492809: 11 observations
## predicted class=Y expected loss=0.2727273 P(node) =0.002743142
## class counts: 3 8
## probabilities: 0.273 0.727
##
## Node number 492810: 13 observations
## predicted class=N expected loss=0.3846154 P(node) =0.003241895
## class counts: 8 5
## probabilities: 0.615 0.385
##
## Node number 492811: 14 observations
## predicted class=Y expected loss=0.07142857 P(node) =0.003491272
## class counts: 1 13
## probabilities: 0.071 0.929
##
## Node number 985616: 231 observations, complexity param=0.0002609603
## predicted class=N expected loss=0.2034632 P(node) =0.05760599
## class counts: 184 47
## probabilities: 0.797 0.203
## left son=1971232 (217 obs) right son=1971233 (14 obs)
## Primary splits:
## T.thing < 0.5 to the left, improve=2.621003, (0 missing)
## T.regard < 0.5 to the right, improve=2.531322, (0 missing)
## T.number < 0.5 to the right, improve=2.321061, (0 missing)
## T.one < 0.5 to the left, improve=1.836648, (0 missing)
## T.attach < 0.5 to the right, improve=1.812848, (0 missing)
##
## Node number 985617: 11 observations
## predicted class=Y expected loss=0.3636364 P(node) =0.002743142
## class counts: 4 7
## probabilities: 0.364 0.636
##
## Node number 1971232: 217 observations, complexity param=0.0002609603
## predicted class=N expected loss=0.1843318 P(node) =0.05411471
## class counts: 177 40
## probabilities: 0.816 0.184
## left son=3942464 (26 obs) right son=3942465 (191 obs)
## Primary splits:
## T.regard < 0.5 to the right, improve=2.007383, (0 missing)
## T.number < 0.5 to the right, improve=1.833767, (0 missing)
## T.attach < 0.5 to the right, improve=1.415072, (0 missing)
## T.has.http < 0.5 to the left, improve=1.371342, (0 missing)
## T.http < 0.5 to the left, improve=1.371342, (0 missing)
## Surrogate splits:
## T.write < 0.5 to the right, agree=0.889, adj=0.077, (0 split)
## T.short < 0.5 to the right, agree=0.889, adj=0.077, (0 split)
## T.approv < 0.5 to the right, agree=0.889, adj=0.077, (0 split)
## T.posit < 1.5 to the right, agree=0.889, adj=0.077, (0 split)
## T.contact < 0.5 to the right, agree=0.889, adj=0.077, (0 split)
##
## Node number 1971233: 14 observations
## predicted class=N expected loss=0.5 P(node) =0.003491272
## class counts: 7 7
## probabilities: 0.500 0.500
##
## Node number 3942464: 26 observations
## predicted class=N expected loss=0 P(node) =0.006483791
## class counts: 26 0
## probabilities: 1.000 0.000
##
## Node number 3942465: 191 observations, complexity param=0.0002609603
## predicted class=N expected loss=0.2094241 P(node) =0.04763092
## class counts: 151 40
## probabilities: 0.791 0.209
## left son=7884930 (23 obs) right son=7884931 (168 obs)
## Primary splits:
## T.number < 0.5 to the right, improve=2.293692, (0 missing)
## T.attach < 0.5 to the right, improve=1.427891, (0 missing)
## T.one < 0.5 to the left, improve=1.388370, (0 missing)
## T.model < 0.5 to the right, improve=1.235429, (0 missing)
## T.well < 0.5 to the right, improve=1.223601, (0 missing)
## Surrogate splits:
## T.order < 1.5 to the right, agree=0.921, adj=0.348, (0 split)
## T.confirm < 1.5 to the right, agree=0.921, adj=0.348, (0 split)
## T.place < 0.5 to the right, agree=0.916, adj=0.304, (0 split)
## T.use < 0.5 to the right, agree=0.901, adj=0.174, (0 split)
## T.request < 0.5 to the right, agree=0.895, adj=0.130, (0 split)
##
## Node number 7884930: 23 observations
## predicted class=N expected loss=0 P(node) =0.005735661
## class counts: 23 0
## probabilities: 1.000 0.000
##
## Node number 7884931: 168 observations, complexity param=0.0002609603
## predicted class=N expected loss=0.2380952 P(node) =0.04189526
## class counts: 128 40
## probabilities: 0.762 0.238
## left son=15769862 (15 obs) right son=15769863 (153 obs)
## Primary splits:
## T.attach < 0.5 to the right, improve=1.867414, (0 missing)
## T.well < 0.5 to the right, improve=1.465201, (0 missing)
## T.may < 0.5 to the right, improve=1.334547, (0 missing)
## T.model < 0.5 to the right, improve=1.215723, (0 missing)
## T.trade < 0.5 to the right, improve=1.205546, (0 missing)
## Surrogate splits:
## T.rate < 0.5 to the right, agree=0.917, adj=0.067, (0 split)
## T.sever < 0.5 to the right, agree=0.917, adj=0.067, (0 split)
## T.note < 0.5 to the right, agree=0.917, adj=0.067, (0 split)
##
## Node number 15769862: 15 observations
## predicted class=N expected loss=0 P(node) =0.003740648
## class counts: 15 0
## probabilities: 1.000 0.000
##
## Node number 15769863: 153 observations, complexity param=0.0002609603
## predicted class=N expected loss=0.2614379 P(node) =0.03815461
## class counts: 113 40
## probabilities: 0.739 0.261
## left son=31539726 (136 obs) right son=31539727 (17 obs)
## Primary splits:
## T.one < 0.5 to the left, improve=1.673203, (0 missing)
## T.well < 0.5 to the right, improve=1.620179, (0 missing)
## T.num.words.unq < 18.5 to the left, improve=1.333918, (0 missing)
## T.num.words.unq.log < 2.970086 to the left, improve=1.333918, (0 missing)
## T.may < 0.5 to the right, improve=1.307190, (0 missing)
## Surrogate splits:
## T.peopl < 0.5 to the left, agree=0.902, adj=0.118, (0 split)
## T.member < 0.5 to the left, agree=0.902, adj=0.118, (0 split)
## T.london < 0.5 to the left, agree=0.902, adj=0.118, (0 split)
## T.involv < 0.5 to the left, agree=0.895, adj=0.059, (0 split)
## T.increas < 0.5 to the left, agree=0.895, adj=0.059, (0 split)
##
## Node number 31539726: 136 observations, complexity param=0.0002609603
## predicted class=N expected loss=0.2352941 P(node) =0.03391521
## class counts: 104 32
## probabilities: 0.765 0.235
## left son=63079452 (126 obs) right son=63079453 (10 obs)
## Primary splits:
## T.num.words.unq < 56 to the left, improve=1.512605, (0 missing)
## T.num.words.unq.log < 4.042435 to the left, improve=1.512605, (0 missing)
## T.well < 0.5 to the right, improve=1.325176, (0 missing)
## T.num.chars < 186.5 to the left, improve=1.235789, (0 missing)
## T.num.chars.log < 5.233747 to the left, improve=1.235789, (0 missing)
## Surrogate splits:
## T.num.words.unq.log < 4.042435 to the left, agree=1.000, adj=1.0, (0 split)
## T.num.chars < 765 to the left, agree=0.985, adj=0.8, (0 split)
## T.num.words < 76.5 to the left, agree=0.985, adj=0.8, (0 split)
## T.num.words.log < 4.344227 to the left, agree=0.985, adj=0.8, (0 split)
## T.num.chars.log < 6.641015 to the left, agree=0.985, adj=0.8, (0 split)
##
## Node number 31539727: 17 observations
## predicted class=N expected loss=0.4705882 P(node) =0.004239401
## class counts: 9 8
## probabilities: 0.529 0.471
##
## Node number 63079452: 126 observations, complexity param=0.0002609603
## predicted class=N expected loss=0.2142857 P(node) =0.03142145
## class counts: 99 27
## probabilities: 0.786 0.214
## left son=126158904 (22 obs) right son=126158905 (104 obs)
## Primary splits:
## T.num.chars < 393.5 to the right, improve=2.447802, (0 missing)
## T.num.chars.log < 5.977618 to the right, improve=2.447802, (0 missing)
## T.num.words < 43.5 to the right, improve=2.054740, (0 missing)
## T.num.words.log < 3.795426 to the right, improve=2.054740, (0 missing)
## T.price < 0.5 to the left, improve=1.174517, (0 missing)
## Surrogate splits:
## T.num.chars.log < 5.977618 to the right, agree=1.000, adj=1.000, (0 split)
## T.num.words.unq < 32 to the right, agree=0.976, adj=0.864, (0 split)
## T.num.words < 40.5 to the right, agree=0.976, adj=0.864, (0 split)
## T.num.words.unq.log < 3.496048 to the right, agree=0.976, adj=0.864, (0 split)
## T.num.words.log < 3.725621 to the right, agree=0.976, adj=0.864, (0 split)
##
## Node number 63079453: 10 observations
## predicted class=N expected loss=0.5 P(node) =0.002493766
## class counts: 5 5
## probabilities: 0.500 0.500
##
## Node number 126158904: 22 observations
## predicted class=N expected loss=0 P(node) =0.005486284
## class counts: 22 0
## probabilities: 1.000 0.000
##
## Node number 126158905: 104 observations, complexity param=0.0002609603
## predicted class=N expected loss=0.2596154 P(node) =0.02593516
## class counts: 77 27
## probabilities: 0.740 0.260
## left son=252317810 (94 obs) right son=252317811 (10 obs)
## Primary splits:
## T.price < 0.5 to the left, improve=2.563748, (0 missing)
## T.num.chars < 186.5 to the left, improve=2.552726, (0 missing)
## T.num.chars.log < 5.233747 to the left, improve=2.552726, (0 missing)
## T.num.words < 20.5 to the left, improve=2.309844, (0 missing)
## T.num.words.log < 3.067782 to the left, improve=2.309844, (0 missing)
## Surrogate splits:
## T.month < 0.5 to the left, agree=0.923, adj=0.2, (0 split)
## T.gas < 0.5 to the left, agree=0.923, adj=0.2, (0 split)
## T.avail < 0.5 to the left, agree=0.913, adj=0.1, (0 split)
## T.open < 0.5 to the left, agree=0.913, adj=0.1, (0 split)
##
## Node number 252317810: 94 observations, complexity param=0.0002609603
## predicted class=N expected loss=0.2234043 P(node) =0.0234414
## class counts: 73 21
## probabilities: 0.777 0.223
## left son=504635620 (87 obs) right son=504635621 (7 obs)
## Primary splits:
## T.num.words.unq < 28.5 to the left, improve=1.832128, (0 missing)
## T.num.words.unq.log < 3.384247 to the left, improve=1.832128, (0 missing)
## T.num.words < 20.5 to the left, improve=1.405328, (0 missing)
## T.num.words.log < 3.067782 to the left, improve=1.405328, (0 missing)
## T.num.chars < 186.5 to the left, improve=1.276018, (0 missing)
## Surrogate splits:
## T.num.words.unq.log < 3.384247 to the left, agree=1.000, adj=1.000, (0 split)
## T.num.words < 31 to the left, agree=0.968, adj=0.571, (0 split)
## T.num.words.log < 3.465247 to the left, agree=0.968, adj=0.571, (0 split)
## T.num.chars < 343 to the left, agree=0.957, adj=0.429, (0 split)
## T.num.chars.log < 5.840625 to the left, agree=0.957, adj=0.429, (0 split)
##
## Node number 252317811: 10 observations
## predicted class=Y expected loss=0.4 P(node) =0.002493766
## class counts: 4 6
## probabilities: 0.400 0.600
##
## Node number 504635620: 87 observations
## predicted class=N expected loss=0.1954023 P(node) =0.02169576
## class counts: 70 17
## probabilities: 0.805 0.195
##
## Node number 504635621: 7 observations
## predicted class=Y expected loss=0.4285714 P(node) =0.001745636
## class counts: 3 4
## probabilities: 0.429 0.571
##
## n= 4010
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 4010 958 N (0.761097257 0.238902743)
## 2) T.thank>=0.5 1704 96 N (0.943661972 0.056338028)
## 4) T.remov< 0.5 1655 66 N (0.960120846 0.039879154)
## 8) T.account< 3.5 1638 52 N (0.968253968 0.031746032)
## 16) T.money< 2.5 1630 44 N (0.973006135 0.026993865)
## 32) T.compani< 7.5 1621 38 N (0.976557680 0.023442320)
## 64) T.money< 0.5 1578 27 N (0.982889734 0.017110266)
## 128) T.link< 0.5 1500 18 N (0.988000000 0.012000000)
## 256) T.net< 0.5 1452 12 N (0.991735537 0.008264463) *
## 257) T.net>=0.5 48 6 N (0.875000000 0.125000000)
## 514) T.num.words>=82 39 0 N (1.000000000 0.000000000) *
## 515) T.num.words< 82 9 3 Y (0.333333333 0.666666667) *
## 129) T.link>=0.5 78 9 N (0.884615385 0.115384615) *
## 65) T.money>=0.5 43 11 N (0.744186047 0.255813953)
## 130) T.subject>=1.5 25 0 N (1.000000000 0.000000000) *
## 131) T.subject< 1.5 18 7 Y (0.388888889 0.611111111) *
## 33) T.compani>=7.5 9 3 Y (0.333333333 0.666666667) *
## 17) T.money>=2.5 8 0 Y (0.000000000 1.000000000) *
## 9) T.account>=3.5 17 3 Y (0.176470588 0.823529412) *
## 5) T.remov>=0.5 49 19 Y (0.387755102 0.612244898)
## 10) T.receiv< 0.5 25 8 N (0.680000000 0.320000000)
## 20) T.let>=0.5 12 0 N (1.000000000 0.000000000) *
## 21) T.let< 0.5 13 5 Y (0.384615385 0.615384615) *
## 11) T.receiv>=0.5 24 2 Y (0.083333333 0.916666667) *
## 3) T.thank< 0.5 2306 862 N (0.626192541 0.373807459)
## 6) T.X2000>=0.5 473 13 N (0.972515856 0.027484144)
## 12) T.remov< 0.5 450 5 N (0.988888889 0.011111111) *
## 13) T.remov>=0.5 23 8 N (0.652173913 0.347826087)
## 26) T.forward>=0.5 9 0 N (1.000000000 0.000000000) *
## 27) T.forward< 0.5 14 6 Y (0.428571429 0.571428571) *
## 7) T.X2000< 0.5 1833 849 N (0.536824877 0.463175123)
## 14) T.X2001>=0.5 246 15 N (0.939024390 0.060975610)
## 28) T.secur< 2.5 239 8 N (0.966527197 0.033472803)
## 56) T.net< 0.5 232 4 N (0.982758621 0.017241379) *
## 57) T.net>=0.5 7 3 Y (0.428571429 0.571428571) *
## 29) T.secur>=2.5 7 0 Y (0.000000000 1.000000000) *
## 15) T.X2001< 0.5 1587 753 Y (0.474480151 0.525519849)
## 30) T.life< 0.5 1428 678 N (0.525210084 0.474789916)
## 60) T.money< 0.5 1279 541 N (0.577013292 0.422986708)
## 120) T.offer< 0.5 1112 405 N (0.635791367 0.364208633)
## 240) T.remov< 0.5 1044 345 N (0.669540230 0.330459770)
## 480) T.research>=0.5 153 4 N (0.973856209 0.026143791) *
## 481) T.research< 0.5 891 341 N (0.617283951 0.382716049)
## 962) T.onlin< 0.5 838 295 N (0.647971360 0.352028640)
## 1924) T.pleas>=0.5 227 35 N (0.845814978 0.154185022)
## 3848) T.account< 0.5 209 24 N (0.885167464 0.114832536)
## 7696) T.program< 1.5 201 18 N (0.910447761 0.089552239)
## 15392) T.email< 1.5 188 11 N (0.941489362 0.058510638) *
## 15393) T.email>=1.5 13 6 Y (0.461538462 0.538461538) *
## 7697) T.program>=1.5 8 2 Y (0.250000000 0.750000000) *
## 3849) T.account>=0.5 18 7 Y (0.388888889 0.611111111) *
## 1925) T.pleas< 0.5 611 260 N (0.574468085 0.425531915)
## 3850) T.com< 0.5 503 180 N (0.642147117 0.357852883)
## 7700) T.custom< 0.5 466 146 N (0.686695279 0.313304721)
## 15400) T.click< 0.5 442 124 N (0.719457014 0.280542986)
## 30800) T.websit< 0.5 420 104 N (0.752380952 0.247619048)
## 61600) T.will>=0.5 125 10 N (0.920000000 0.080000000) *
## 61601) T.will< 0.5 295 94 N (0.681355932 0.318644068)
## 123202) T.softwar< 0.5 280 80 N (0.714285714 0.285714286)
## 246404) T.messag< 0.5 253 62 N (0.754940711 0.245059289)
## 492808) T.free< 0.5 242 54 N (0.776859504 0.223140496)
## 985616) T.now< 0.5 231 47 N (0.796536797 0.203463203)
## 1971232) T.thing< 0.5 217 40 N (0.815668203 0.184331797)
## 3942464) T.regard>=0.5 26 0 N (1.000000000 0.000000000) *
## 3942465) T.regard< 0.5 191 40 N (0.790575916 0.209424084)
## 7884930) T.number>=0.5 23 0 N (1.000000000 0.000000000) *
## 7884931) T.number< 0.5 168 40 N (0.761904762 0.238095238)
## 15769862) T.attach>=0.5 15 0 N (1.000000000 0.000000000) *
## 15769863) T.attach< 0.5 153 40 N (0.738562092 0.261437908)
## 31539726) T.one< 0.5 136 32 N (0.764705882 0.235294118)
## 63079452) T.num.words.unq< 56 126 27 N (0.785714286 0.214285714)
## 126158904) T.num.chars>=393.5 22 0 N (1.000000000 0.000000000) *
## 126158905) T.num.chars< 393.5 104 27 N (0.740384615 0.259615385)
## 252317810) T.price< 0.5 94 21 N (0.776595745 0.223404255)
## 504635620) T.num.words.unq< 28.5 87 17 N (0.804597701 0.195402299) *
## 504635621) T.num.words.unq>=28.5 7 3 Y (0.428571429 0.571428571) *
## 252317811) T.price>=0.5 10 4 Y (0.400000000 0.600000000) *
## 63079453) T.num.words.unq>=56 10 5 N (0.500000000 0.500000000) *
## 31539727) T.one>=0.5 17 8 N (0.529411765 0.470588235) *
## 1971233) T.thing>=0.5 14 7 N (0.500000000 0.500000000) *
## 985617) T.now>=0.5 11 4 Y (0.363636364 0.636363636) *
## 492809) T.free>=0.5 11 3 Y (0.272727273 0.727272727) *
## 246405) T.messag>=0.5 27 9 Y (0.333333333 0.666666667)
## 492810) T.mail< 0.5 13 5 N (0.615384615 0.384615385) *
## 492811) T.mail>=0.5 14 1 Y (0.071428571 0.928571429) *
## 123203) T.softwar>=0.5 15 1 Y (0.066666667 0.933333333) *
## 30801) T.websit>=0.5 22 2 Y (0.090909091 0.909090909) *
## 15401) T.click>=0.5 24 2 Y (0.083333333 0.916666667) *
## 7701) T.custom>=0.5 37 3 Y (0.081081081 0.918918919) *
## 3851) T.com>=0.5 108 28 Y (0.259259259 0.740740741)
## 7702) T.will>=0.5 14 5 N (0.642857143 0.357142857) *
## 7703) T.will< 0.5 94 19 Y (0.202127660 0.797872340)
## 15406) T.power>=0.5 11 5 N (0.545454545 0.454545455) *
## 15407) T.power< 0.5 83 13 Y (0.156626506 0.843373494) *
## 963) T.onlin>=0.5 53 7 Y (0.132075472 0.867924528)
## 1926) T.num.words.unq>=67 8 3 N (0.625000000 0.375000000) *
## 1927) T.num.words.unq< 67 45 2 Y (0.044444444 0.955555556) *
## 241) T.remov>=0.5 68 8 Y (0.117647059 0.882352941)
## 482) T.regard>=0.5 9 4 N (0.555555556 0.444444444) *
## 483) T.regard< 0.5 59 3 Y (0.050847458 0.949152542) *
## 121) T.offer>=0.5 167 31 Y (0.185628743 0.814371257)
## 242) T.model>=0.5 9 0 N (1.000000000 0.000000000) *
## 243) T.model< 0.5 158 22 Y (0.139240506 0.860759494)
## 486) T.energi>=0.5 9 1 N (0.888888889 0.111111111) *
## 487) T.energi< 0.5 149 14 Y (0.093959732 0.906040268)
## 974) T.research>=0.5 8 3 N (0.625000000 0.375000000) *
## 975) T.research< 0.5 141 9 Y (0.063829787 0.936170213) *
## 61) T.money>=0.5 149 12 Y (0.080536913 0.919463087)
## 122) T.ask>=0.5 9 3 N (0.666666667 0.333333333) *
## 123) T.ask< 0.5 140 6 Y (0.042857143 0.957142857) *
## 31) T.life>=0.5 159 3 Y (0.018867925 0.981132075) *
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 2734 46
## Y 318 912
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 2734
## 2 Y 46
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 318
## 2 912
## Reference
## Prediction N Y
## N 2881 73
## Y 171 885
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 2881
## 2 Y 73
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 171
## 2 885
## Reference
## Prediction N Y
## N 2881 73
## Y 171 885
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 2881
## 2 Y 73
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 171
## 2 885
## Reference
## Prediction N Y
## N 2914 92
## Y 138 866
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 2914
## 2 Y 92
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 138
## 2 866
## Reference
## Prediction N Y
## N 2934 109
## Y 118 849
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 2934
## 2 Y 109
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 118
## 2 849
## Reference
## Prediction N Y
## N 2968 150
## Y 84 808
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 2968
## 2 Y 150
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 84
## 2 808
## Reference
## Prediction N Y
## N 2997 199
## Y 55 759
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 2997
## 2 Y 199
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 55
## 2 759
## Reference
## Prediction N Y
## N 3002 213
## Y 50 745
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 3002
## 2 Y 213
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 50
## 2 745
## Reference
## Prediction N Y
## N 3018 297
## Y 34 661
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 3018
## 2 Y 297
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 34
## 2 661
## Reference
## Prediction N Y
## N 3052 943
## Y 0 15
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 3052
## 2 Y 943
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 0
## 2 15
## threshold f.score
## 1 0.0 0.38566828
## 2 0.1 0.83363803
## 3 0.2 0.87884806
## 4 0.3 0.87884806
## 5 0.4 0.88277268
## 6 0.5 0.88207792
## 7 0.6 0.87351351
## 8 0.7 0.85665914
## 9 0.8 0.84997148
## 10 0.9 0.79975802
## 11 1.0 0.03083248
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 2914
## 2 Y 92
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 138
## 2 866
## Reference
## Prediction N Y
## N 2914 92
## Y 138 866
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 2914
## 2 Y 92
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 138
## 2 866
## Prediction
## Reference N Y
## N 2914 138
## Y 92 866
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.426434e-01 8.448340e-01 9.349933e-01 9.496402e-01 7.610973e-01
## AccuracyPValue McnemarPValue
## 7.134035e-211 3.005135e-03
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 1160 40
## Y 148 370
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1160
## 2 Y 40
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 148
## 2 370
## Reference
## Prediction N Y
## N 1225 58
## Y 83 352
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1225
## 2 Y 58
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 83
## 2 352
## Reference
## Prediction N Y
## N 1225 58
## Y 83 352
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1225
## 2 Y 58
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 83
## 2 352
## Reference
## Prediction N Y
## N 1236 72
## Y 72 338
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1236
## 2 Y 72
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 72
## 2 338
## Reference
## Prediction N Y
## N 1240 81
## Y 68 329
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1240
## 2 Y 81
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 68
## 2 329
## Reference
## Prediction N Y
## N 1258 94
## Y 50 316
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1258
## 2 Y 94
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 50
## 2 316
## Reference
## Prediction N Y
## N 1271 121
## Y 37 289
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1271
## 2 Y 121
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 37
## 2 289
## Reference
## Prediction N Y
## N 1274 129
## Y 34 281
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1274
## 2 Y 129
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 34
## 2 281
## Reference
## Prediction N Y
## N 1281 160
## Y 27 250
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1281
## 2 Y 160
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 27
## 2 250
## Reference
## Prediction N Y
## N 1308 410
## Y 0 0
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1308
## 2 Y 410
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 0
## 2 0
## threshold f.score
## 1 0.0 0.3853383
## 2 0.1 0.7974138
## 3 0.2 0.8331361
## 4 0.3 0.8331361
## 5 0.4 0.8243902
## 6 0.5 0.8153656
## 7 0.6 0.8144330
## 8 0.7 0.7853261
## 9 0.8 0.7751724
## 10 0.9 0.7278020
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.3000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1225
## 2 Y 58
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 83
## 2 352
## Reference
## Prediction N Y
## N 1225 58
## Y 83 352
## spam.fctr spam.fctr.predict.Conditional.X.cp.0.rpart.N
## 1 N 1225
## 2 Y 58
## spam.fctr.predict.Conditional.X.cp.0.rpart.Y
## 1 83
## 2 352
## Prediction
## Reference N Y
## N 1225 83
## Y 58 352
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.179278e-01 7.787801e-01 9.039320e-01 9.304754e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 9.168553e-65 4.326273e-02
## model_id model_method
## 1 Conditional.X.cp.0.rpart rpart
## feats
## 1 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 0 3.582 3.012
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.9730402 0.4 0.8827727 0.9426434
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.9349933 0.9496402 0.844834 0.9340158
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.3 0.8331361 0.9179278
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.903932 0.9304754 0.7787801
## [1] "fitting model: Conditional.X.rf"
## [1] " indep_vars: T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank"
## Loading required package: randomForest
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
## + : mtry= 2
## - : mtry= 2
## + : mtry=164
## - : mtry=164
## + : mtry=327
## - : mtry=327
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 164 on full training set
## Length Class Mode
## call 4 -none- call
## type 1 -none- character
## predicted 4010 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 8020 matrix numeric
## oob.times 4010 -none- numeric
## classes 2 -none- character
## importance 327 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 4010 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 327 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Conditional.X.rf.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 2843 0
## Y 209 958
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 2843
## 2 Y 0
## spam.fctr.predict.Conditional.X.rf.Y
## 1 209
## 2 958
## Reference
## Prediction N Y
## N 3020 0
## Y 32 958
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3020
## 2 Y 0
## spam.fctr.predict.Conditional.X.rf.Y
## 1 32
## 2 958
## Reference
## Prediction N Y
## N 3047 0
## Y 5 958
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3047
## 2 Y 0
## spam.fctr.predict.Conditional.X.rf.Y
## 1 5
## 2 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3052
## 2 Y 0
## spam.fctr.predict.Conditional.X.rf.Y
## 1 0
## 2 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3052
## 2 Y 0
## spam.fctr.predict.Conditional.X.rf.Y
## 1 0
## 2 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3052
## 2 Y 0
## spam.fctr.predict.Conditional.X.rf.Y
## 1 0
## 2 958
## Reference
## Prediction N Y
## N 3052 29
## Y 0 929
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3052
## 2 Y 29
## spam.fctr.predict.Conditional.X.rf.Y
## 1 0
## 2 929
## Reference
## Prediction N Y
## N 3052 92
## Y 0 866
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3052
## 2 Y 92
## spam.fctr.predict.Conditional.X.rf.Y
## 1 0
## 2 866
## Reference
## Prediction N Y
## N 3052 231
## Y 0 727
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3052
## 2 Y 231
## spam.fctr.predict.Conditional.X.rf.Y
## 1 0
## 2 727
## Reference
## Prediction N Y
## N 3052 741
## Y 0 217
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3052
## 2 Y 741
## spam.fctr.predict.Conditional.X.rf.Y
## 1 0
## 2 217
## threshold f.score
## 1 0.0 0.3856683
## 2 0.1 0.9016471
## 3 0.2 0.9835729
## 4 0.3 0.9973972
## 5 0.4 1.0000000
## 6 0.5 1.0000000
## 7 0.6 1.0000000
## 8 0.7 0.9846317
## 9 0.8 0.9495614
## 10 0.9 0.8629080
## 11 1.0 0.3693617
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3052
## 2 Y NA
## spam.fctr.predict.Conditional.X.rf.Y
## 1 NA
## 2 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 3052
## 2 Y 0
## spam.fctr.predict.Conditional.X.rf.Y
## 1 0
## 2 958
## Prediction
## Reference N Y
## N 3052 0
## Y 0 958
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 1.0000000 1.0000000 0.9990805 1.0000000 0.7610973
## AccuracyPValue McnemarPValue
## 0.0000000 NaN
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Conditional.X.rf.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 1079 5
## Y 229 405
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1079
## 2 Y 5
## spam.fctr.predict.Conditional.X.rf.Y
## 1 229
## 2 405
## Reference
## Prediction N Y
## N 1193 12
## Y 115 398
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1193
## 2 Y 12
## spam.fctr.predict.Conditional.X.rf.Y
## 1 115
## 2 398
## Reference
## Prediction N Y
## N 1246 22
## Y 62 388
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1246
## 2 Y 22
## spam.fctr.predict.Conditional.X.rf.Y
## 1 62
## 2 388
## Reference
## Prediction N Y
## N 1275 39
## Y 33 371
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1275
## 2 Y 39
## spam.fctr.predict.Conditional.X.rf.Y
## 1 33
## 2 371
## Reference
## Prediction N Y
## N 1293 58
## Y 15 352
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1293
## 2 Y 58
## spam.fctr.predict.Conditional.X.rf.Y
## 1 15
## 2 352
## Reference
## Prediction N Y
## N 1300 79
## Y 8 331
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1300
## 2 Y 79
## spam.fctr.predict.Conditional.X.rf.Y
## 1 8
## 2 331
## Reference
## Prediction N Y
## N 1302 110
## Y 6 300
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1302
## 2 Y 110
## spam.fctr.predict.Conditional.X.rf.Y
## 1 6
## 2 300
## Reference
## Prediction N Y
## N 1304 141
## Y 4 269
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1304
## 2 Y 141
## spam.fctr.predict.Conditional.X.rf.Y
## 1 4
## 2 269
## Reference
## Prediction N Y
## N 1308 191
## Y 0 219
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1308
## 2 Y 191
## spam.fctr.predict.Conditional.X.rf.Y
## 1 0
## 2 219
## Reference
## Prediction N Y
## N 1308 342
## Y 0 68
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1308
## 2 Y 342
## spam.fctr.predict.Conditional.X.rf.Y
## 1 0
## 2 68
## threshold f.score
## 1 0.0 0.3853383
## 2 0.1 0.7758621
## 3 0.2 0.8624052
## 4 0.3 0.9023256
## 5 0.4 0.9115479
## 6 0.5 0.9060489
## 7 0.6 0.8838451
## 8 0.7 0.8379888
## 9 0.8 0.7877013
## 10 0.9 0.6963434
## 11 1.0 0.2845188
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1275
## 2 Y 39
## spam.fctr.predict.Conditional.X.rf.Y
## 1 33
## 2 371
## Reference
## Prediction N Y
## N 1275 39
## Y 33 371
## spam.fctr spam.fctr.predict.Conditional.X.rf.N
## 1 N 1275
## 2 Y 39
## spam.fctr.predict.Conditional.X.rf.Y
## 1 33
## 2 371
## Prediction
## Reference N Y
## N 1275 33
## Y 39 371
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.580908e-01 8.840899e-01 9.475110e-01 9.670667e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 6.783387e-112 5.556898e-01
## model_id model_method
## 1 Conditional.X.rf rf
## feats
## 1 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 3 519.274 119.301
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 1 0.6 1 0.9608479
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.9990805 1 0.8899334 0.9888221
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.4 0.9115479 0.9580908
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.947511 0.9670667 0.8840899
## [1] "fitting model: Conditional.X.no.rnorm.rf"
## [1] " indep_vars: T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank"
## + : mtry= 2
## - : mtry= 2
## + : mtry=164
## - : mtry=164
## + : mtry=326
## - : mtry=326
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 164 on full training set
## Length Class Mode
## call 4 -none- call
## type 1 -none- character
## predicted 4010 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 8020 matrix numeric
## oob.times 4010 -none- numeric
## classes 2 -none- character
## importance 326 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 4010 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 326 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 3052
## 2 958
## Reference
## Prediction N Y
## N 2858 0
## Y 194 958
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 2858
## 2 Y 0
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 194
## 2 958
## Reference
## Prediction N Y
## N 3017 0
## Y 35 958
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3017
## 2 Y 0
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 35
## 2 958
## Reference
## Prediction N Y
## N 3048 0
## Y 4 958
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3048
## 2 Y 0
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 4
## 2 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3052
## 2 Y 0
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 0
## 2 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3052
## 2 Y 0
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 0
## 2 958
## Reference
## Prediction N Y
## N 3052 2
## Y 0 956
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3052
## 2 Y 2
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 0
## 2 956
## Reference
## Prediction N Y
## N 3052 26
## Y 0 932
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3052
## 2 Y 26
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 0
## 2 932
## Reference
## Prediction N Y
## N 3052 96
## Y 0 862
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3052
## 2 Y 96
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 0
## 2 862
## Reference
## Prediction N Y
## N 3052 230
## Y 0 728
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3052
## 2 Y 230
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 0
## 2 728
## Reference
## Prediction N Y
## N 3052 727
## Y 0 231
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3052
## 2 Y 727
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 0
## 2 231
## threshold f.score
## 1 0.0 0.3856683
## 2 0.1 0.9080569
## 3 0.2 0.9820605
## 4 0.3 0.9979167
## 5 0.4 1.0000000
## 6 0.5 1.0000000
## 7 0.6 0.9989551
## 8 0.7 0.9862434
## 9 0.8 0.9472527
## 10 0.9 0.8635824
## 11 1.0 0.3885618
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3052
## 2 Y NA
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 NA
## 2 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 3052
## 2 Y 0
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 0
## 2 958
## Prediction
## Reference N Y
## N 3052 0
## Y 0 958
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 1.0000000 1.0000000 0.9990805 1.0000000 0.7610973
## AccuracyPValue McnemarPValue
## 0.0000000 NaN
## Reference
## Prediction N Y
## N 0 0
## Y 1308 410
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 0
## 2 Y 0
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 1308
## 2 410
## Reference
## Prediction N Y
## N 1072 4
## Y 236 406
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1072
## 2 Y 4
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 236
## 2 406
## Reference
## Prediction N Y
## N 1200 13
## Y 108 397
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1200
## 2 Y 13
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 108
## 2 397
## Reference
## Prediction N Y
## N 1248 24
## Y 60 386
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1248
## 2 Y 24
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 60
## 2 386
## Reference
## Prediction N Y
## N 1277 43
## Y 31 367
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1277
## 2 Y 43
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 31
## 2 367
## Reference
## Prediction N Y
## N 1292 58
## Y 16 352
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1292
## 2 Y 58
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 16
## 2 352
## Reference
## Prediction N Y
## N 1299 77
## Y 9 333
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1299
## 2 Y 77
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 9
## 2 333
## Reference
## Prediction N Y
## N 1301 108
## Y 7 302
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1301
## 2 Y 108
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 7
## 2 302
## Reference
## Prediction N Y
## N 1304 136
## Y 4 274
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1304
## 2 Y 136
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 4
## 2 274
## Reference
## Prediction N Y
## N 1307 188
## Y 1 222
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1307
## 2 Y 188
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 1
## 2 222
## Reference
## Prediction N Y
## N 1308 326
## Y 0 84
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1308
## 2 Y 326
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 0
## 2 84
## threshold f.score
## 1 0.0 0.3853383
## 2 0.1 0.7718631
## 3 0.2 0.8677596
## 4 0.3 0.9018692
## 5 0.4 0.9084158
## 6 0.5 0.9048843
## 7 0.6 0.8856383
## 8 0.7 0.8400556
## 9 0.8 0.7965116
## 10 0.9 0.7014218
## 11 1.0 0.3400810
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.OOB"
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1277
## 2 Y 43
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 31
## 2 367
## Reference
## Prediction N Y
## N 1277 43
## Y 31 367
## spam.fctr spam.fctr.predict.Conditional.X.no.rnorm.rf.N
## 1 N 1277
## 2 Y 43
## spam.fctr.predict.Conditional.X.no.rnorm.rf.Y
## 1 31
## 2 367
## Prediction
## Reference N Y
## N 1277 31
## Y 43 367
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.569267e-01 8.802657e-01 9.462249e-01 9.660295e-01 7.613504e-01
## AccuracyPValue McnemarPValue
## 3.356457e-110 2.009943e-01
## model_id model_method
## 1 Conditional.X.no.rnorm.rf rf
## feats
## 1 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 3 503.114 115.456
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 1 0.5 1 0.9613466
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.9990805 1 0.8912559 0.9882356
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.4 0.9084158 0.9569267
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.9462249 0.9660295 0.8802657
# User specified
# easier to exclude features
#model_id_pfx <- "";
# indep_vars_vctr <- setdiff(names(glb_trnent_df),
# union(union(glb_rsp_var, glb_exclude_vars_as_features),
# c("<feat1_name>", "<feat2_name>")))
# method <- ""
# easier to include features
#model_id_pfx <- ""; indep_vars_vctr <- c("<feat1_name>", "<feat1_name>"); method <- ""
# User specified bivariate models
# indep_vars_vctr_lst <- list()
# for (feat in setdiff(names(glb_trnent_df),
# union(glb_rsp_var, glb_exclude_vars_as_features)))
# indep_vars_vctr_lst[["feat"]] <- feat
# User specified combinatorial models
# indep_vars_vctr_lst <- list()
# combn_mtrx <- combn(c("<feat1_name>", "<feat2_name>", "<featn_name>"),
# <num_feats_to_choose>)
# for (combn_ix in 1:ncol(combn_mtrx))
# #print(combn_mtrx[, combn_ix])
# indep_vars_vctr_lst[[combn_ix]] <- combn_mtrx[, combn_ix]
# template for myfit_mdl
# rf is hard-coded in caret to recognize only Accuracy / Kappa evaluation metrics
# only for OOB in trainControl ?
# ret_lst <- myfit_mdl_fn(model_id=paste0(model_id_pfx, ""), model_method=method,
# indep_vars_vctr=indep_vars_vctr,
# rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
# fit_df=glb_trnent_df, OOB_df=glb_newent_df,
# n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df,
# model_loss_mtrx=glb_model_metric_terms,
# model_summaryFunction=glb_model_metric_smmry,
# model_metric=glb_model_metric,
# model_metric_maximize=glb_model_metric_maximize)
# Simplify a model
# fit_df <- glb_trnent_df; glb_mdl <- step(<complex>_mdl)
# Non-caret models
# rpart_area_mdl <- rpart(reformulate("Area", response=glb_rsp_var),
# data=glb_trnent_df, #method="class",
# control=rpart.control(cp=0.12),
# parms=list(loss=glb_model_metric_terms))
# print("rpart_sel_wlm_mdl"); prp(rpart_sel_wlm_mdl)
#
print(glb_models_df)
## model_id model_method
## 1 MFO.myMFO_classfr myMFO_classfr
## 2 Random.myrandom_classfr myrandom_classfr
## 3 Max.cor.Y.cv.0.rpart rpart
## 4 Max.cor.Y.cv.0.cp.0.rpart rpart
## 5 Max.cor.Y.rpart rpart
## 6 Max.cor.Y.glm glm
## 7 Interact.High.cor.Y.glm glm
## 8 Low.cor.X.glm glm
## 9 Conditional.X.glm glm
## 10 Conditional.X.rpart rpart
## 11 Conditional.X.cp.0.rpart rpart
## 12 Conditional.X.rf rf
## 13 Conditional.X.no.rnorm.rf rf
## feats
## 1 .rnorm
## 2 .rnorm
## 3 T.vinc
## 4 T.vinc
## 5 T.vinc
## 6 T.vinc
## 7 T.vinc, T.vinc:T.report, T.vinc:T.www, T.vinc:T.num.words.unq, T.vinc:T.num.words, T.vinc:T.hou, T.vinc:T.know
## 8 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.research, T.pleas, T.forward, T.X2000, T.subject, T.thank
## 9 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## 10 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## 11 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## 12 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## 13 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 0 0.368 0.003
## 2 0 0.255 0.001
## 3 0 0.629 0.042
## 4 0 0.498 0.043
## 5 1 1.060 0.043
## 6 1 1.146 0.111
## 7 1 1.381 0.181
## 8 1 46.686 16.432
## 9 1 51.673 17.404
## 10 3 17.521 3.007
## 11 0 3.582 3.012
## 12 3 519.274 119.301
## 13 3 503.114 115.456
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5000000 0.5 0.0000000 0.7610973
## 2 0.4979065 0.2 0.3856683 0.2389027
## 3 0.5000000 0.5 0.0000000 0.7610973
## 4 0.5000000 0.5 0.0000000 0.7610973
## 5 0.5000000 0.5 0.0000000 0.7610974
## 6 0.8168414 0.4 0.6315096 0.7610974
## 7 0.8168414 0.4 0.6315096 0.7610974
## 8 0.9688397 0.9 0.9351433 0.9149608
## 9 0.9573851 0.9 0.9494080 0.9189498
## 10 0.8739962 0.3 0.6554028 0.8725535
## 11 0.9730402 0.4 0.8827727 0.9426434
## 12 1.0000000 0.6 1.0000000 0.9608479
## 13 1.0000000 0.5 1.0000000 0.9613466
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.7475872 0.7742259 0.0000000 0.5000000
## 2 0.2257741 0.2524128 0.0000000 0.5049414
## 3 0.7475872 0.7742259 0.0000000 0.5000000
## 4 0.7475872 0.7742259 0.0000000 0.5000000
## 5 0.7475872 0.7742259 0.0000000 0.5000000
## 6 0.7070358 0.7350353 0.0000000 0.8264955
## 7 0.7070358 0.7350353 0.0000000 0.8264284
## 8 0.9618923 0.9730733 0.7621305 0.9312318
## 9 0.9713892 0.9810164 0.7844885 0.9046767
## 10 0.7681753 0.7940070 0.6153166 0.8629279
## 11 0.9349933 0.9496402 0.8448340 0.9340158
## 12 0.9990805 1.0000000 0.8899334 0.9888221
## 13 0.9990805 1.0000000 0.8912559 0.9882356
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0.0000000 0.7613504
## 2 0.2 0.3853383 0.2386496
## 3 0.5 0.0000000 0.7613504
## 4 0.5 0.0000000 0.7613504
## 5 0.5 0.0000000 0.7613504
## 6 0.4 0.6435877 0.7363213
## 7 0.4 0.6435877 0.7363213
## 8 0.9 0.8712644 0.9348079
## 9 0.9 0.8657465 0.9377183
## 10 0.3 0.6528404 0.7759022
## 11 0.3 0.8331361 0.9179278
## 12 0.4 0.9115479 0.9580908
## 13 0.4 0.9084158 0.9569267
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7404667 0.7813376 0.0000000
## 2 0.2186624 0.2595333 0.0000000
## 3 0.7404667 0.7813376 0.0000000
## 4 0.7404667 0.7813376 0.0000000
## 5 0.7404667 0.7813376 0.0000000
## 6 0.7148024 0.7570300 0.4732836
## 7 0.7148024 0.7570300 0.4732836
## 8 0.9220828 0.9460215 0.8278095
## 9 0.9252328 0.9486811 0.8252448
## 10 0.7554310 0.7954265 0.5034615
## 11 0.9039320 0.9304754 0.7787801
## 12 0.9475110 0.9670667 0.8840899
## 13 0.9462249 0.9660295 0.8802657
## max.AccuracySD.fit max.KappaSD.fit min.aic.fit
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 0.0001952373 0.00000000 NA
## 6 0.0002254406 0.00000000 2869.603
## 7 0.0002254406 0.00000000 2881.603
## 8 0.0060855881 0.02245497 9943.263
## 9 0.0048766596 0.01175917 7432.207
## 10 0.0273811305 0.08595926 NA
## 11 NA NA NA
## 12 NA NA NA
## 13 NA NA NA
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="fit.models",
chunk_step_major=glb_script_df[nrow(glb_script_df), "chunk_step_major"],
chunk_step_minor=glb_script_df[nrow(glb_script_df), "chunk_step_minor"]+1,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed9 fit.models 5 1 173.248
## elapsed10 fit.models 5 2 1302.645
if (!is.null(glb_model_metric_smmry)) {
stats_df <- glb_models_df[, "model_id", FALSE]
stats_mdl_df <- data.frame()
for (model_id in stats_df$model_id) {
stats_mdl_df <- rbind(stats_mdl_df,
mypredict_mdl(glb_models_lst[[model_id]], glb_trnent_df, glb_rsp_var,
glb_rsp_var_out, model_id, "fit",
glb_model_metric_smmry, glb_model_metric,
glb_model_metric_maximize, ret_type="stats"))
}
stats_df <- merge(stats_df, stats_mdl_df, all.x=TRUE)
stats_mdl_df <- data.frame()
for (model_id in stats_df$model_id) {
stats_mdl_df <- rbind(stats_mdl_df,
mypredict_mdl(glb_models_lst[[model_id]], glb_newent_df, glb_rsp_var,
glb_rsp_var_out, model_id, "OOB",
glb_model_metric_smmry, glb_model_metric,
glb_model_metric_maximize, ret_type="stats"))
}
stats_df <- merge(stats_df, stats_mdl_df, all.x=TRUE)
# tmp_models_df <- orderBy(~model_id, glb_models_df)
# rownames(tmp_models_df) <- seq(1, nrow(tmp_models_df))
# all.equal(subset(tmp_models_df[, names(stats_df)], model_id != "Random.myrandom_classfr"),
# subset(stats_df, model_id != "Random.myrandom_classfr"))
# print(subset(tmp_models_df[, names(stats_df)], model_id != "Random.myrandom_classfr")[, c("model_id", "max.Accuracy.fit")])
# print(subset(stats_df, model_id != "Random.myrandom_classfr")[, c("model_id", "max.Accuracy.fit")])
print("Merging following data into glb_models_df:")
print(stats_mrg_df <- stats_df[, c(1, grep(glb_model_metric, names(stats_df)))])
print(tmp_models_df <- orderBy(~model_id, glb_models_df[, c("model_id", grep(glb_model_metric, names(stats_df), value=TRUE))]))
tmp2_models_df <- glb_models_df[, c("model_id", setdiff(names(glb_models_df), grep(glb_model_metric, names(stats_df), value=TRUE)))]
tmp3_models_df <- merge(tmp2_models_df, stats_mrg_df, all.x=TRUE, sort=FALSE)
print(tmp3_models_df)
print(names(tmp3_models_df))
print(glb_models_df <- subset(tmp3_models_df, select=-model_id.1))
}
plt_models_df <- glb_models_df[, -grep("SD|Upper|Lower", names(glb_models_df))]
for (var in grep("^min.", names(plt_models_df), value=TRUE)) {
plt_models_df[, sub("min.", "inv.", var)] <-
#ifelse(all(is.na(tmp <- plt_models_df[, var])), NA, 1.0 / tmp)
1.0 / plt_models_df[, var]
plt_models_df <- plt_models_df[ , -grep(var, names(plt_models_df))]
}
print(plt_models_df)
## model_id model_method
## 1 MFO.myMFO_classfr myMFO_classfr
## 2 Random.myrandom_classfr myrandom_classfr
## 3 Max.cor.Y.cv.0.rpart rpart
## 4 Max.cor.Y.cv.0.cp.0.rpart rpart
## 5 Max.cor.Y.rpart rpart
## 6 Max.cor.Y.glm glm
## 7 Interact.High.cor.Y.glm glm
## 8 Low.cor.X.glm glm
## 9 Conditional.X.glm glm
## 10 Conditional.X.rpart rpart
## 11 Conditional.X.cp.0.rpart rpart
## 12 Conditional.X.rf rf
## 13 Conditional.X.no.rnorm.rf rf
## feats
## 1 .rnorm
## 2 .rnorm
## 3 T.vinc
## 4 T.vinc
## 5 T.vinc
## 6 T.vinc
## 7 T.vinc, T.vinc:T.report, T.vinc:T.www, T.vinc:T.num.words.unq, T.vinc:T.num.words, T.vinc:T.hou, T.vinc:T.know
## 8 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.research, T.pleas, T.forward, T.X2000, T.subject, T.thank
## 9 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## 10 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## 11 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## 12 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, .rnorm, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## 13 T.click, T.life, T.remov, T.websit, T.money, T.now, T.onlin, T.receiv, T.softwar, T.invest, T.without, T.secur, T.has.http, T.offer, T.special, T.free, T.within, T.account, T.net, T.just, T.compani, T.wish, T.custom, T.busi, T.mail, T.right, T.futur, T.email, T.site, T.address, T.home, T.internet, T.X000, T.effect, T.list, T.made, T.success, T.order, T.result, T.buy, T.link, T.http, T.design, T.peopl, T.product, T.repli, T.line, T.thing, T.inform, T.info, T.immedi, T.make, T.way, T.report, T.start, T.believ, T.web, T.provid, T.mean, T.read, T.one, T.expect, T.per, T.full, T.return, T.version, T.high, T.get, T.don, T.www, T.best, T.today, T.name, T.send, T.even, T.mani, T.involv, T.form, T.servic, T.system, T.want, T.increas, T.creat, T.hour, T.corpor, T.check, T.say, T.engin, T.hello, T.keep, T.person, T.avail, T.month, T.real, T.program, T.write, T.place, T.import, T.complet, T.state, T.type, T.done, T.rate, T.financi, T.includ, T.industri, T.lot, T.sever, T.base, T.applic, T.due, T.realli, T.day, T.requir, T.messag, T.assist, T.take, T.allow, T.effort, T.tri, T.market, T.use, T.oper, T.first, T.cost, T.much, T.find, T.public, T.sure, T.valu, T.new, T.access, T.term, T.short, T.interest, T.unit, T.part, T.better, T.file, T.see, T.recent, T.chang, T.may, T.plan, T.event, T.member, T.intern, T.number, T.great, T.differ, T.specif, T.long, T.alreadi, T.good, T.visit, T.credit, T.relat, T.resourc, T.approv, T.opportun, T.look, T.bring, T.area, T.current, T.deal, T.num.words.unq, T.given, T.experi, T.price, T.move, T.put, T.posit, T.sorri, T.review, T.locat, T.anoth, T.continu, T.can, T.cours, T.sinc, T.contract, T.come, T.respons, T.sincer, T.end, T.year, T.time, T.project, T.com, T.addit, T.detail, T.might, T.process, T.open, T.trade, T.idea, T.feel, T.case, T.respond, T.gas, T.updat, T.either, T.communic, T.final, T.need, T.well, T.num.chars, T.num.words, T.support, T.join, T.give, T.power, T.juli, T.follow, T.back, T.direct, T.abl, T.phone, T.director, T.help, T.two, T.point, T.note, T.thought, T.book, T.offic, T.hear, T.comment, T.copi, T.associ, T.particip, T.etc, T.still, T.confirm, T.will, T.run, T.fax, T.work, T.origin, T.problem, T.contact, T.next., T.understand, T.howev, T.develop, T.mention, T.data, T.issu, T.sent, T.togeth, T.begin, T.team, T.deriv, T.present, T.happi, T.date, T.also, T.set, T.tuesday, T.mark, T.soon, T.last, T.request, T.april, T.risk, T.like, T.num.words.unq.log, T.invit, T.depart, T.think, T.analysi, T.possibl, T.week, T.school, T.london, T.robert, T.student, T.call, T.option, T.confer, T.dear, T.edu, T.wednesday, T.arrang, T.manag, T.ask, T.energi, T.financ, T.doc, T.suggest, T.friday, T.resum, T.thursday, T.john, T.attend, T.corp, T.morn, T.monday, T.univers, T.appreci, T.shall, T.interview, T.model, T.houston, T.question, T.meet, T.talk, T.num.words.log, T.num.chars.log, T.schedul, T.group, T.discuss, T.hou, T.know, T.hope, T.attach, T.X2001, T.regard, T.ect, T.research, T.pleas, T.forward, T.let, T.X2000, T.subject, T.thank
## max.nTuningRuns max.auc.fit opt.prob.threshold.fit max.f.score.fit
## 1 0 0.5000000 0.5 0.0000000
## 2 0 0.4979065 0.2 0.3856683
## 3 0 0.5000000 0.5 0.0000000
## 4 0 0.5000000 0.5 0.0000000
## 5 1 0.5000000 0.5 0.0000000
## 6 1 0.8168414 0.4 0.6315096
## 7 1 0.8168414 0.4 0.6315096
## 8 1 0.9688397 0.9 0.9351433
## 9 1 0.9573851 0.9 0.9494080
## 10 3 0.8739962 0.3 0.6554028
## 11 0 0.9730402 0.4 0.8827727
## 12 3 1.0000000 0.6 1.0000000
## 13 3 1.0000000 0.5 1.0000000
## max.Accuracy.fit max.Kappa.fit max.auc.OOB opt.prob.threshold.OOB
## 1 0.7610973 0.0000000 0.5000000 0.5
## 2 0.2389027 0.0000000 0.5049414 0.2
## 3 0.7610973 0.0000000 0.5000000 0.5
## 4 0.7610973 0.0000000 0.5000000 0.5
## 5 0.7610974 0.0000000 0.5000000 0.5
## 6 0.7610974 0.0000000 0.8264955 0.4
## 7 0.7610974 0.0000000 0.8264284 0.4
## 8 0.9149608 0.7621305 0.9312318 0.9
## 9 0.9189498 0.7844885 0.9046767 0.9
## 10 0.8725535 0.6153166 0.8629279 0.3
## 11 0.9426434 0.8448340 0.9340158 0.3
## 12 0.9608479 0.8899334 0.9888221 0.4
## 13 0.9613466 0.8912559 0.9882356 0.4
## max.f.score.OOB max.Accuracy.OOB max.Kappa.OOB
## 1 0.0000000 0.7613504 0.0000000
## 2 0.3853383 0.2386496 0.0000000
## 3 0.0000000 0.7613504 0.0000000
## 4 0.0000000 0.7613504 0.0000000
## 5 0.0000000 0.7613504 0.0000000
## 6 0.6435877 0.7363213 0.4732836
## 7 0.6435877 0.7363213 0.4732836
## 8 0.8712644 0.9348079 0.8278095
## 9 0.8657465 0.9377183 0.8252448
## 10 0.6528404 0.7759022 0.5034615
## 11 0.8331361 0.9179278 0.7787801
## 12 0.9115479 0.9580908 0.8840899
## 13 0.9084158 0.9569267 0.8802657
## inv.elapsedtime.everything inv.elapsedtime.final inv.aic.fit
## 1 2.717391304 3.333333e+02 NA
## 2 3.921568627 1.000000e+03 NA
## 3 1.589825119 2.380952e+01 NA
## 4 2.008032129 2.325581e+01 NA
## 5 0.943396226 2.325581e+01 NA
## 6 0.872600349 9.009009e+00 0.0003484802
## 7 0.724112962 5.524862e+00 0.0003470290
## 8 0.021419698 6.085686e-02 0.0001005706
## 9 0.019352466 5.745806e-02 0.0001345495
## 10 0.057074368 3.325574e-01 NA
## 11 0.279173646 3.320053e-01 NA
## 12 0.001925766 8.382159e-03 NA
## 13 0.001987621 8.661308e-03 NA
print(myplot_radar(radar_inp_df=plt_models_df))
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 13. Consider specifying shapes manually. if you must have them.
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 104 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_text).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 13. Consider specifying shapes manually. if you must have them.
# print(myplot_radar(radar_inp_df=subset(plt_models_df,
# !(model_id %in% grep("random|MFO", plt_models_df$model_id, value=TRUE)))))
# Compute CI for <metric>SD
glb_models_df <- mutate(glb_models_df,
max.df = ifelse(max.nTuningRuns > 1, max.nTuningRuns - 1, NA),
min.sd2ci.scaler = ifelse(is.na(max.df), NA, qt(0.975, max.df)))
for (var in grep("SD", names(glb_models_df), value=TRUE)) {
# Does CI alredy exist ?
var_components <- unlist(strsplit(var, "SD"))
varActul <- paste0(var_components[1], var_components[2])
varUpper <- paste0(var_components[1], "Upper", var_components[2])
varLower <- paste0(var_components[1], "Lower", var_components[2])
if (varUpper %in% names(glb_models_df)) {
warning(varUpper, " already exists in glb_models_df")
# Assuming Lower also exists
next
}
print(sprintf("var:%s", var))
# CI is dependent on sample size in t distribution; df=n-1
glb_models_df[, varUpper] <- glb_models_df[, varActul] +
glb_models_df[, "min.sd2ci.scaler"] * glb_models_df[, var]
glb_models_df[, varLower] <- glb_models_df[, varActul] -
glb_models_df[, "min.sd2ci.scaler"] * glb_models_df[, var]
}
## Warning: max.AccuracyUpper.fit already exists in glb_models_df
## [1] "var:max.KappaSD.fit"
# Plot metrics with CI
plt_models_df <- glb_models_df[, "model_id", FALSE]
pltCI_models_df <- glb_models_df[, "model_id", FALSE]
for (var in grep("Upper", names(glb_models_df), value=TRUE)) {
var_components <- unlist(strsplit(var, "Upper"))
col_name <- unlist(paste(var_components, collapse=""))
plt_models_df[, col_name] <- glb_models_df[, col_name]
for (name in paste0(var_components[1], c("Upper", "Lower"), var_components[2]))
pltCI_models_df[, name] <- glb_models_df[, name]
}
build_statsCI_data <- function(plt_models_df) {
mltd_models_df <- melt(plt_models_df, id.vars="model_id")
mltd_models_df$data <- sapply(1:nrow(mltd_models_df),
function(row_ix) tail(unlist(strsplit(as.character(
mltd_models_df[row_ix, "variable"]), "[.]")), 1))
mltd_models_df$label <- sapply(1:nrow(mltd_models_df),
function(row_ix) head(unlist(strsplit(as.character(
mltd_models_df[row_ix, "variable"]), paste0(".", mltd_models_df[row_ix, "data"]))), 1))
#print(mltd_models_df)
return(mltd_models_df)
}
mltd_models_df <- build_statsCI_data(plt_models_df)
mltdCI_models_df <- melt(pltCI_models_df, id.vars="model_id")
for (row_ix in 1:nrow(mltdCI_models_df)) {
for (type in c("Upper", "Lower")) {
if (length(var_components <- unlist(strsplit(
as.character(mltdCI_models_df[row_ix, "variable"]), type))) > 1) {
#print(sprintf("row_ix:%d; type:%s; ", row_ix, type))
mltdCI_models_df[row_ix, "label"] <- var_components[1]
mltdCI_models_df[row_ix, "data"] <- unlist(strsplit(var_components[2], "[.]"))[2]
mltdCI_models_df[row_ix, "type"] <- type
break
}
}
}
#print(mltdCI_models_df)
# castCI_models_df <- dcast(mltdCI_models_df, value ~ type, fun.aggregate=sum)
# print(castCI_models_df)
wideCI_models_df <- reshape(subset(mltdCI_models_df, select=-variable),
timevar="type",
idvar=setdiff(names(mltdCI_models_df), c("type", "value", "variable")),
direction="wide")
#print(wideCI_models_df)
mrgdCI_models_df <- merge(wideCI_models_df, mltd_models_df, all.x=TRUE)
#print(mrgdCI_models_df)
# Merge stats back in if CIs don't exist
goback_vars <- c()
for (var in unique(mltd_models_df$label)) {
for (type in unique(mltd_models_df$data)) {
var_type <- paste0(var, ".", type)
# if this data is already present, next
if (var_type %in% unique(paste(mltd_models_df$label, mltd_models_df$data, sep=".")))
next
#print(sprintf("var_type:%s", var_type))
goback_vars <- c(goback_vars, var_type)
}
}
if (length(goback_vars) > 0) {
mltd_goback_df <- build_statsCI_data(glb_models_df[, c("model_id", goback_vars)])
mltd_models_df <- rbind(mltd_models_df, mltd_goback_df)
}
mltd_models_df <- merge(mltd_models_df, glb_models_df[, c("model_id", "model_method")], all.x=TRUE)
png(paste0(glb_out_pfx, "models_bar.png"), width=480*3, height=480*2)
print(gp <- myplot_bar(mltd_models_df, "model_id", "value", colorcol_name="model_method") +
geom_errorbar(data=mrgdCI_models_df,
mapping=aes(x=model_id, ymax=value.Upper, ymin=value.Lower), width=0.5) +
facet_grid(label ~ data, scales="free") +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5)))
dev.off()
## quartz_off_screen
## 2
print(gp)
# used for console inspection
model_evl_terms <- c(NULL)
for (metric in glb_model_evl_criteria)
model_evl_terms <- c(model_evl_terms,
ifelse(length(grep("max", metric)) > 0, "-", "+"), metric)
model_sel_frmla <- as.formula(paste(c("~ ", model_evl_terms), collapse=" "))
print(tmp_models_df <- orderBy(model_sel_frmla, glb_models_df)[, c("model_id", glb_model_evl_criteria)])
## model_id max.Accuracy.OOB max.Kappa.OOB min.aic.fit
## 12 Conditional.X.rf 0.9580908 0.8840899 NA
## 13 Conditional.X.no.rnorm.rf 0.9569267 0.8802657 NA
## 9 Conditional.X.glm 0.9377183 0.8252448 7432.207
## 8 Low.cor.X.glm 0.9348079 0.8278095 9943.263
## 11 Conditional.X.cp.0.rpart 0.9179278 0.7787801 NA
## 10 Conditional.X.rpart 0.7759022 0.5034615 NA
## 1 MFO.myMFO_classfr 0.7613504 0.0000000 NA
## 3 Max.cor.Y.cv.0.rpart 0.7613504 0.0000000 NA
## 4 Max.cor.Y.cv.0.cp.0.rpart 0.7613504 0.0000000 NA
## 5 Max.cor.Y.rpart 0.7613504 0.0000000 NA
## 6 Max.cor.Y.glm 0.7363213 0.4732836 2869.603
## 7 Interact.High.cor.Y.glm 0.7363213 0.4732836 2881.603
## 2 Random.myrandom_classfr 0.2386496 0.0000000 NA
print(myplot_radar(radar_inp_df=tmp_models_df))
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 13. Consider specifying shapes manually. if you must have them.
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 27 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_text).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 13. Consider specifying shapes manually. if you must have them.
print("Metrics used for model selection:"); print(model_sel_frmla)
## [1] "Metrics used for model selection:"
## ~-max.Accuracy.OOB - max.Kappa.OOB + min.aic.fit
print(sprintf("Best model id: %s", tmp_models_df[1, "model_id"]))
## [1] "Best model id: Conditional.X.rf"
if (is.null(glb_sel_mdl_id))
{ glb_sel_mdl_id <- tmp_models_df[1, "model_id"] } else
print(sprintf("User specified selection: %s", glb_sel_mdl_id))
myprint_mdl(glb_sel_mdl <- glb_models_lst[[glb_sel_mdl_id]])
## Length Class Mode
## call 4 -none- call
## type 1 -none- character
## predicted 4010 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 8020 matrix numeric
## oob.times 4010 -none- numeric
## classes 2 -none- character
## importance 327 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 4010 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 327 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] TRUE
replay.petrisim(pn=glb_analytics_pn,
replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"model.selected")), flip_coord=TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## 2.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction firing: model.selected
## 3.0000 3 0 2 1 0
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="fit.data.training.all",
chunk_step_major=max(glb_script_df$chunk_step_major)+1,
chunk_step_minor=0,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed10 fit.models 5 2 1302.645
## elapsed11 fit.data.training.all 6 0 1318.755
6: fit.data.training.allif (!is.null(glb_fin_mdl_id) && (glb_fin_mdl_id %in% names(glb_models_lst))) {
warning("Final model same as user selected model")
glb_fin_mdl <- glb_sel_mdl
} else {
print(mdl_feats_df <- myextract_mdl_feats(sel_mdl=glb_sel_mdl, entity_df=glb_trnent_df))
if ((model_method <- glb_sel_mdl$method) == "custom")
# get actual method from the model_id
model_method <- tail(unlist(strsplit(glb_sel_mdl_id, "[.]")), 1)
tune_finmdl_df <- NULL
if (nrow(glb_sel_mdl$bestTune) > 0) {
for (param in names(glb_sel_mdl$bestTune)) {
#print(sprintf("param: %s", param))
tune_finmdl_df <- rbind(tune_finmdl_df,
data.frame(parameter=param,
min=glb_sel_mdl$bestTune[1, param],
max=glb_sel_mdl$bestTune[1, param],
by=1)) # by val does not matter
}
}
# Sync with parameters in mydsutils.R
ret_lst <- myfit_mdl(model_id="Final", model_method=model_method,
indep_vars_vctr=mdl_feats_df$id, model_type=glb_model_type,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnent_df, OOB_df=NULL,
n_cv_folds=glb_n_cv_folds, tune_models_df=tune_finmdl_df,
# Automate from here
# Issues if glb_sel_mdl$method == "rf" b/c trainControl is "oob"; not "cv"
model_loss_mtrx=glb_model_metric_terms,
model_summaryFunction=glb_sel_mdl$control$summaryFunction,
model_metric=glb_sel_mdl$metric,
model_metric_maximize=glb_sel_mdl$maximize)
glb_fin_mdl <- glb_models_lst[[length(glb_models_lst)]]
glb_fin_mdl_id <- glb_models_df[length(glb_models_lst), "model_id"]
}
## importance id fit.feat
## T.thank 1.000000e+02 T.thank TRUE
## T.X2000 6.689411e+01 T.X2000 TRUE
## T.money 6.250465e+01 T.money TRUE
## T.life 5.126688e+01 T.life TRUE
## T.click 4.493832e+01 T.click TRUE
## T.ect 3.015914e+01 T.ect TRUE
## T.X2001 2.854909e+01 T.X2001 TRUE
## T.research 2.587504e+01 T.research TRUE
## T.remov 2.280134e+01 T.remov TRUE
## T.offer 2.097396e+01 T.offer TRUE
## T.softwar 1.371497e+01 T.softwar TRUE
## T.pleas 1.356197e+01 T.pleas TRUE
## T.attach 1.352692e+01 T.attach TRUE
## T.onlin 1.297577e+01 T.onlin TRUE
## T.custom 1.265012e+01 T.custom TRUE
## T.subject 1.158537e+01 T.subject TRUE
## T.account 1.085153e+01 T.account TRUE
## T.model 1.066658e+01 T.model TRUE
## T.websit 1.060672e+01 T.websit TRUE
## T.will 1.055103e+01 T.will TRUE
## T.num.chars 1.001925e+01 T.num.chars TRUE
## T.num.chars.log 9.462260e+00 T.num.chars.log TRUE
## T.num.words.log 8.428560e+00 T.num.words.log TRUE
## T.num.words 7.886814e+00 T.num.words TRUE
## T.com 7.761906e+00 T.com TRUE
## T.num.words.unq 7.654058e+00 T.num.words.unq TRUE
## T.num.words.unq.log 7.565707e+00 T.num.words.unq.log TRUE
## T.regard 7.494885e+00 T.regard TRUE
## T.energi 7.376932e+00 T.energi TRUE
## .rnorm 7.179686e+00 .rnorm TRUE
## T.meet 6.353011e+00 T.meet TRUE
## T.compani 6.350235e+00 T.compani TRUE
## T.net 6.253751e+00 T.net TRUE
## T.messag 6.013834e+00 T.messag TRUE
## T.hou 5.869402e+00 T.hou TRUE
## T.http 5.663867e+00 T.http TRUE
## T.houston 5.393488e+00 T.houston TRUE
## T.mail 5.337298e+00 T.mail TRUE
## T.free 4.868961e+00 T.free TRUE
## T.request 4.671665e+00 T.request TRUE
## T.has.http 4.526829e+00 T.has.http TRUE
## T.now 4.037841e+00 T.now TRUE
## T.email 3.497458e+00 T.email TRUE
## T.number 3.458594e+00 T.number TRUE
## T.date 3.239425e+00 T.date TRUE
## T.invest 3.085699e+00 T.invest TRUE
## T.resourc 3.065293e+00 T.resourc TRUE
## T.site 3.009378e+00 T.site TRUE
## T.group 2.969640e+00 T.group TRUE
## T.edu 2.916398e+00 T.edu TRUE
## T.hello 2.758850e+00 T.hello TRUE
## T.forward 2.624701e+00 T.forward TRUE
## T.secur 2.423016e+00 T.secur TRUE
## T.trade 2.328928e+00 T.trade TRUE
## T.inform 2.319230e+00 T.inform TRUE
## T.fax 2.303982e+00 T.fax TRUE
## T.one 2.285540e+00 T.one TRUE
## T.X000 2.179680e+00 T.X000 TRUE
## T.receiv 2.081486e+00 T.receiv TRUE
## T.rate 2.046663e+00 T.rate TRUE
## T.product 2.035776e+00 T.product TRUE
## T.hope 1.983694e+00 T.hope TRUE
## T.follow 1.981371e+00 T.follow TRUE
## T.review 1.972197e+00 T.review TRUE
## T.power 1.930133e+00 T.power TRUE
## T.price 1.907384e+00 T.price TRUE
## T.without 1.864040e+00 T.without TRUE
## T.link 1.859137e+00 T.link TRUE
## T.home 1.851725e+00 T.home TRUE
## T.corp 1.843015e+00 T.corp TRUE
## T.like 1.726876e+00 T.like TRUE
## T.univers 1.689843e+00 T.univers TRUE
## T.address 1.656006e+00 T.address TRUE
## T.send 1.643903e+00 T.send TRUE
## T.option 1.639626e+00 T.option TRUE
## T.discuss 1.619345e+00 T.discuss TRUE
## T.can 1.616885e+00 T.can TRUE
## T.manag 1.603780e+00 T.manag TRUE
## T.let 1.585432e+00 T.let TRUE
## T.thing 1.566172e+00 T.thing TRUE
## T.risk 1.500548e+00 T.risk TRUE
## T.get 1.497712e+00 T.get TRUE
## T.may 1.387503e+00 T.may TRUE
## T.better 1.354614e+00 T.better TRUE
## T.visit 1.347245e+00 T.visit TRUE
## T.www 1.335732e+00 T.www TRUE
## T.list 1.296802e+00 T.list TRUE
## T.info 1.261506e+00 T.info TRUE
## T.full 1.234350e+00 T.full TRUE
## T.repli 1.220439e+00 T.repli TRUE
## T.question 1.215960e+00 T.question TRUE
## T.schedul 1.215311e+00 T.schedul TRUE
## T.continu 1.189401e+00 T.continu TRUE
## T.creat 1.176633e+00 T.creat TRUE
## T.offic 1.176412e+00 T.offic TRUE
## T.file 1.169081e+00 T.file TRUE
## T.john 1.137297e+00 T.john TRUE
## T.year 1.118445e+00 T.year TRUE
## T.doc 1.114185e+00 T.doc TRUE
## T.work 1.107221e+00 T.work TRUE
## T.web 1.106349e+00 T.web TRUE
## T.line 1.092438e+00 T.line TRUE
## T.increas 1.088354e+00 T.increas TRUE
## T.call 1.079911e+00 T.call TRUE
## T.best 1.052366e+00 T.best TRUE
## T.wish 1.041196e+00 T.wish TRUE
## T.also 1.035667e+00 T.also TRUE
## T.differ 1.029793e+00 T.differ TRUE
## T.use 1.025830e+00 T.use TRUE
## T.happi 1.020172e+00 T.happi TRUE
## T.know 1.009709e+00 T.know TRUE
## T.want 1.007321e+00 T.want TRUE
## T.issu 9.984333e-01 T.issu TRUE
## T.set 9.785326e-01 T.set TRUE
## T.week 9.686986e-01 T.week TRUE
## T.busi 9.683363e-01 T.busi TRUE
## T.suggest 9.612003e-01 T.suggest TRUE
## T.state 9.595927e-01 T.state TRUE
## T.applic 9.259455e-01 T.applic TRUE
## T.help 9.116935e-01 T.help TRUE
## T.return 9.047085e-01 T.return TRUE
## T.talk 9.038415e-01 T.talk TRUE
## T.confer 9.018154e-01 T.confer TRUE
## T.updat 8.877595e-01 T.updat TRUE
## T.still 8.831260e-01 T.still TRUE
## T.gas 8.772081e-01 T.gas TRUE
## T.buy 8.617613e-01 T.buy TRUE
## T.just 8.560770e-01 T.just TRUE
## T.look 8.558153e-01 T.look TRUE
## T.per 8.555509e-01 T.per TRUE
## T.dear 8.457167e-01 T.dear TRUE
## T.tri 8.300783e-01 T.tri TRUE
## T.approv 8.163718e-01 T.approv TRUE
## T.new 8.088179e-01 T.new TRUE
## T.good 8.060782e-01 T.good TRUE
## T.deriv 8.055564e-01 T.deriv TRUE
## T.order 8.033586e-01 T.order TRUE
## T.time 7.975577e-01 T.time TRUE
## T.day 7.849974e-01 T.day TRUE
## T.within 7.651509e-01 T.within TRUE
## T.much 7.511282e-01 T.much TRUE
## T.market 7.462507e-01 T.market TRUE
## T.internet 7.461971e-01 T.internet TRUE
## T.deal 7.421217e-01 T.deal TRUE
## T.program 7.363667e-01 T.program TRUE
## T.right 7.345183e-01 T.right TRUE
## T.morn 7.234775e-01 T.morn TRUE
## T.effect 7.203853e-01 T.effect TRUE
## T.monday 7.174999e-01 T.monday TRUE
## T.feel 7.004453e-01 T.feel TRUE
## T.contact 6.980267e-01 T.contact TRUE
## T.area 6.934384e-01 T.area TRUE
## T.don 6.822978e-01 T.don TRUE
## T.director 6.810934e-01 T.director TRUE
## T.resum 6.772275e-01 T.resum TRUE
## T.special 6.595836e-01 T.special TRUE
## T.person 6.579987e-01 T.person TRUE
## T.depart 6.579457e-01 T.depart TRUE
## T.intern 6.556878e-01 T.intern TRUE
## T.provid 6.518019e-01 T.provid TRUE
## T.origin 6.401393e-01 T.origin TRUE
## T.futur 6.387394e-01 T.futur TRUE
## T.success 6.295702e-01 T.success TRUE
## T.keep 6.218667e-01 T.keep TRUE
## T.last 6.144210e-01 T.last TRUE
## T.well 6.142870e-01 T.well TRUE
## T.credit 6.089182e-01 T.credit TRUE
## T.contract 6.004442e-01 T.contract TRUE
## T.phone 5.996228e-01 T.phone TRUE
## T.servic 5.992704e-01 T.servic TRUE
## T.sorri 5.992020e-01 T.sorri TRUE
## T.interest 5.973595e-01 T.interest TRUE
## T.allow 5.926691e-01 T.allow TRUE
## T.avail 5.911924e-01 T.avail TRUE
## T.result 5.873975e-01 T.result TRUE
## T.financ 5.871416e-01 T.financ TRUE
## T.case 5.856659e-01 T.case TRUE
## T.chang 5.847512e-01 T.chang TRUE
## T.check 5.779024e-01 T.check TRUE
## T.anoth 5.738194e-01 T.anoth TRUE
## T.report 5.669554e-01 T.report TRUE
## T.make 5.623704e-01 T.make TRUE
## T.shall 5.564971e-01 T.shall TRUE
## T.find 5.505341e-01 T.find TRUE
## T.book 5.493366e-01 T.book TRUE
## T.need 5.333326e-01 T.need TRUE
## T.month 5.166041e-01 T.month TRUE
## T.high 5.111214e-01 T.high TRUE
## T.version 5.033756e-01 T.version TRUE
## T.take 4.959769e-01 T.take TRUE
## T.come 4.938262e-01 T.come TRUE
## T.hour 4.900744e-01 T.hour TRUE
## T.system 4.873297e-01 T.system TRUE
## T.write 4.861432e-01 T.write TRUE
## T.relat 4.827054e-01 T.relat TRUE
## T.form 4.825180e-01 T.form TRUE
## T.name 4.820021e-01 T.name TRUE
## T.addit 4.802578e-01 T.addit TRUE
## T.access 4.733360e-01 T.access TRUE
## T.way 4.728798e-01 T.way TRUE
## T.confirm 4.727883e-01 T.confirm TRUE
## T.abl 4.709950e-01 T.abl TRUE
## T.assist 4.700748e-01 T.assist TRUE
## T.project 4.665827e-01 T.project TRUE
## T.immedi 4.567287e-01 T.immedi TRUE
## T.next. 4.566919e-01 T.next. TRUE
## T.long 4.456010e-01 T.long TRUE
## T.import 4.423913e-01 T.import TRUE
## T.friday 4.355324e-01 T.friday TRUE
## T.posit 4.334479e-01 T.posit TRUE
## T.opportun 4.271382e-01 T.opportun TRUE
## T.etc 4.212680e-01 T.etc TRUE
## T.either 4.145171e-01 T.either TRUE
## T.give 4.142127e-01 T.give TRUE
## T.great 4.138503e-01 T.great TRUE
## T.analysi 4.098188e-01 T.analysi TRUE
## T.cours 4.057978e-01 T.cours TRUE
## T.corpor 4.027015e-01 T.corpor TRUE
## T.place 4.001472e-01 T.place TRUE
## T.copi 3.989613e-01 T.copi TRUE
## T.today 3.916975e-01 T.today TRUE
## T.mark 3.878037e-01 T.mark TRUE
## T.includ 3.862831e-01 T.includ TRUE
## T.process 3.836898e-01 T.process TRUE
## T.financi 3.833851e-01 T.financi TRUE
## T.see 3.833802e-01 T.see TRUE
## T.associ 3.779964e-01 T.associ TRUE
## T.specif 3.775749e-01 T.specif TRUE
## T.april 3.762540e-01 T.april TRUE
## T.mani 3.736878e-01 T.mani TRUE
## T.particip 3.708483e-01 T.particip TRUE
## T.two 3.674728e-01 T.two TRUE
## T.develop 3.660004e-01 T.develop TRUE
## T.sent 3.588444e-01 T.sent TRUE
## T.final 3.576982e-01 T.final TRUE
## T.ask 3.511722e-01 T.ask TRUE
## T.base 3.451920e-01 T.base TRUE
## T.engin 3.443701e-01 T.engin TRUE
## T.first 3.403618e-01 T.first TRUE
## T.sure 3.399761e-01 T.sure TRUE
## T.present 3.394727e-01 T.present TRUE
## T.even 3.337061e-01 T.even TRUE
## T.read 3.265823e-01 T.read TRUE
## T.type 3.187866e-01 T.type TRUE
## T.possibl 3.164406e-01 T.possibl TRUE
## T.real 3.145013e-01 T.real TRUE
## T.data 3.134264e-01 T.data TRUE
## T.locat 3.102122e-01 T.locat TRUE
## T.requir 3.018930e-01 T.requir TRUE
## T.think 3.000940e-01 T.think TRUE
## T.peopl 2.981611e-01 T.peopl TRUE
## T.interview 2.913032e-01 T.interview TRUE
## T.london 2.911988e-01 T.london TRUE
## T.sinc 2.901248e-01 T.sinc TRUE
## T.cost 2.823917e-01 T.cost TRUE
## T.run 2.784265e-01 T.run TRUE
## T.howev 2.781044e-01 T.howev TRUE
## T.valu 2.773974e-01 T.valu TRUE
## T.support 2.708595e-01 T.support TRUE
## T.industri 2.697252e-01 T.industri TRUE
## T.detail 2.566523e-01 T.detail TRUE
## T.recent 2.549628e-01 T.recent TRUE
## T.direct 2.530714e-01 T.direct TRUE
## T.design 2.528762e-01 T.design TRUE
## T.respond 2.506485e-01 T.respond TRUE
## T.put 2.474796e-01 T.put TRUE
## T.soon 2.455476e-01 T.soon TRUE
## T.join 2.435923e-01 T.join TRUE
## T.team 2.359660e-01 T.team TRUE
## T.member 2.351803e-01 T.member TRUE
## T.start 2.245813e-01 T.start TRUE
## T.made 2.245093e-01 T.made TRUE
## T.short 2.204747e-01 T.short TRUE
## T.oper 2.181002e-01 T.oper TRUE
## T.public 2.178799e-01 T.public TRUE
## T.student 2.171920e-01 T.student TRUE
## T.idea 2.147210e-01 T.idea TRUE
## T.current 2.105397e-01 T.current TRUE
## T.done 2.087072e-01 T.done TRUE
## T.alreadi 2.084540e-01 T.alreadi TRUE
## T.mention 2.063881e-01 T.mention TRUE
## T.expect 2.051477e-01 T.expect TRUE
## T.event 2.046307e-01 T.event TRUE
## T.respons 2.040739e-01 T.respons TRUE
## T.attend 2.003647e-01 T.attend TRUE
## T.believ 1.968593e-01 T.believ TRUE
## T.end 1.901914e-01 T.end TRUE
## T.part 1.898714e-01 T.part TRUE
## T.problem 1.870254e-01 T.problem TRUE
## T.lot 1.792625e-01 T.lot TRUE
## T.tuesday 1.698485e-01 T.tuesday TRUE
## T.unit 1.670443e-01 T.unit TRUE
## T.say 1.646166e-01 T.say TRUE
## T.complet 1.566649e-01 T.complet TRUE
## T.invit 1.552211e-01 T.invit TRUE
## T.sever 1.521443e-01 T.sever TRUE
## T.thought 1.483872e-01 T.thought TRUE
## T.note 1.476875e-01 T.note TRUE
## T.involv 1.442179e-01 T.involv TRUE
## T.plan 1.330683e-01 T.plan TRUE
## T.back 1.314980e-01 T.back TRUE
## T.juli 1.300655e-01 T.juli TRUE
## T.communic 1.261668e-01 T.communic TRUE
## T.realli 1.201482e-01 T.realli TRUE
## T.term 1.126035e-01 T.term TRUE
## T.might 1.092723e-01 T.might TRUE
## T.thursday 9.244350e-02 T.thursday TRUE
## T.point 8.802568e-02 T.point TRUE
## T.appreci 7.539842e-02 T.appreci TRUE
## T.bring 6.742378e-02 T.bring TRUE
## T.begin 6.695487e-02 T.begin TRUE
## T.hear 5.624557e-02 T.hear TRUE
## T.comment 5.516995e-02 T.comment TRUE
## T.mean 5.221234e-02 T.mean TRUE
## T.open 5.053360e-02 T.open TRUE
## T.wednesday 4.055439e-02 T.wednesday TRUE
## T.given 3.950516e-02 T.given TRUE
## T.robert 3.233001e-02 T.robert TRUE
## T.experi 2.033548e-02 T.experi TRUE
## T.move 1.838462e-02 T.move TRUE
## T.sincer 1.599377e-02 T.sincer TRUE
## T.understand 1.302343e-02 T.understand TRUE
## T.togeth 1.049728e-02 T.togeth TRUE
## T.effort 1.016733e-02 T.effort TRUE
## T.arrang 5.422132e-03 T.arrang TRUE
## T.school 4.660029e-03 T.school TRUE
## T.due 0.000000e+00 T.due TRUE
## [1] "fitting model: Final.rf"
## [1] " indep_vars: T.thank, T.X2000, T.money, T.life, T.click, T.ect, T.X2001, T.research, T.remov, T.offer, T.softwar, T.pleas, T.attach, T.onlin, T.custom, T.subject, T.account, T.model, T.websit, T.will, T.num.chars, T.num.chars.log, T.num.words.log, T.num.words, T.com, T.num.words.unq, T.num.words.unq.log, T.regard, T.energi, .rnorm, T.meet, T.compani, T.net, T.messag, T.hou, T.http, T.houston, T.mail, T.free, T.request, T.has.http, T.now, T.email, T.number, T.date, T.invest, T.resourc, T.site, T.group, T.edu, T.hello, T.forward, T.secur, T.trade, T.inform, T.fax, T.one, T.X000, T.receiv, T.rate, T.product, T.hope, T.follow, T.review, T.power, T.price, T.without, T.link, T.home, T.corp, T.like, T.univers, T.address, T.send, T.option, T.discuss, T.can, T.manag, T.let, T.thing, T.risk, T.get, T.may, T.better, T.visit, T.www, T.list, T.info, T.full, T.repli, T.question, T.schedul, T.continu, T.creat, T.offic, T.file, T.john, T.year, T.doc, T.work, T.web, T.line, T.increas, T.call, T.best, T.wish, T.also, T.differ, T.use, T.happi, T.know, T.want, T.issu, T.set, T.week, T.busi, T.suggest, T.state, T.applic, T.help, T.return, T.talk, T.confer, T.updat, T.still, T.gas, T.buy, T.just, T.look, T.per, T.dear, T.tri, T.approv, T.new, T.good, T.deriv, T.order, T.time, T.day, T.within, T.much, T.market, T.internet, T.deal, T.program, T.right, T.morn, T.effect, T.monday, T.feel, T.contact, T.area, T.don, T.director, T.resum, T.special, T.person, T.depart, T.intern, T.provid, T.origin, T.futur, T.success, T.keep, T.last, T.well, T.credit, T.contract, T.phone, T.servic, T.sorri, T.interest, T.allow, T.avail, T.result, T.financ, T.case, T.chang, T.check, T.anoth, T.report, T.make, T.shall, T.find, T.book, T.need, T.month, T.high, T.version, T.take, T.come, T.hour, T.system, T.write, T.relat, T.form, T.name, T.addit, T.access, T.way, T.confirm, T.abl, T.assist, T.project, T.immedi, T.next., T.long, T.import, T.friday, T.posit, T.opportun, T.etc, T.either, T.give, T.great, T.analysi, T.cours, T.corpor, T.place, T.copi, T.today, T.mark, T.includ, T.process, T.financi, T.see, T.associ, T.specif, T.april, T.mani, T.particip, T.two, T.develop, T.sent, T.final, T.ask, T.base, T.engin, T.first, T.sure, T.present, T.even, T.read, T.type, T.possibl, T.real, T.data, T.locat, T.requir, T.think, T.peopl, T.interview, T.london, T.sinc, T.cost, T.run, T.howev, T.valu, T.support, T.industri, T.detail, T.recent, T.direct, T.design, T.respond, T.put, T.soon, T.join, T.team, T.member, T.start, T.made, T.short, T.oper, T.public, T.student, T.idea, T.current, T.done, T.alreadi, T.mention, T.expect, T.event, T.respons, T.attend, T.believ, T.end, T.part, T.problem, T.lot, T.tuesday, T.unit, T.say, T.complet, T.invit, T.sever, T.thought, T.note, T.involv, T.plan, T.back, T.juli, T.communic, T.realli, T.term, T.might, T.thursday, T.point, T.appreci, T.bring, T.begin, T.hear, T.comment, T.mean, T.open, T.wednesday, T.given, T.robert, T.experi, T.move, T.sincer, T.understand, T.togeth, T.effort, T.arrang, T.school, T.due"
## + : mtry=164
## - : mtry=164
## Aggregating results
## Fitting final model on full training set
## Length Class Mode
## call 4 -none- call
## type 1 -none- character
## predicted 4010 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 8020 matrix numeric
## oob.times 4010 -none- numeric
## classes 2 -none- character
## importance 327 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 4010 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 327 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## Reference
## Prediction N Y
## N 0 0
## Y 3052 958
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 0 3052
## 2 Y 0 958
## Reference
## Prediction N Y
## N 2840 0
## Y 212 958
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 2840 212
## 2 Y 0 958
## Reference
## Prediction N Y
## N 3017 0
## Y 35 958
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3017 35
## 2 Y 0 958
## Reference
## Prediction N Y
## N 3047 0
## Y 5 958
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3047 5
## 2 Y 0 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3052 0
## 2 Y 0 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3052 0
## 2 Y 0 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3052 0
## 2 Y 0 958
## Reference
## Prediction N Y
## N 3052 28
## Y 0 930
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3052 0
## 2 Y 28 930
## Reference
## Prediction N Y
## N 3052 93
## Y 0 865
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3052 0
## 2 Y 93 865
## Reference
## Prediction N Y
## N 3052 230
## Y 0 728
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3052 0
## 2 Y 230 728
## Reference
## Prediction N Y
## N 3052 740
## Y 0 218
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3052 0
## 2 Y 740 218
## threshold f.score
## 1 0.0 0.3856683
## 2 0.1 0.9003759
## 3 0.2 0.9820605
## 4 0.3 0.9973972
## 5 0.4 1.0000000
## 6 0.5 1.0000000
## 7 0.6 1.0000000
## 8 0.7 0.9851695
## 9 0.8 0.9489852
## 10 0.9 0.8635824
## 11 1.0 0.3707483
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3052 NA
## 2 Y NA 958
## Reference
## Prediction N Y
## N 3052 0
## Y 0 958
## spam.fctr spam.fctr.predict.Final.rf.N spam.fctr.predict.Final.rf.Y
## 1 N 3052 0
## 2 Y 0 958
## Prediction
## Reference N Y
## N 3052 0
## Y 0 958
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 1.0000000 1.0000000 0.9990805 1.0000000 0.7610973
## AccuracyPValue McnemarPValue
## 0.0000000 NaN
## Warning in mypredict_mdl(mdl, df = fit_df, rsp_var, rsp_var_out,
## model_id_method, : Expecting 1 metric: Accuracy; recd: Accuracy, Kappa;
## retaining Accuracy only
## model_id model_method
## 1 Final.rf rf
## feats
## 1 T.thank, T.X2000, T.money, T.life, T.click, T.ect, T.X2001, T.research, T.remov, T.offer, T.softwar, T.pleas, T.attach, T.onlin, T.custom, T.subject, T.account, T.model, T.websit, T.will, T.num.chars, T.num.chars.log, T.num.words.log, T.num.words, T.com, T.num.words.unq, T.num.words.unq.log, T.regard, T.energi, .rnorm, T.meet, T.compani, T.net, T.messag, T.hou, T.http, T.houston, T.mail, T.free, T.request, T.has.http, T.now, T.email, T.number, T.date, T.invest, T.resourc, T.site, T.group, T.edu, T.hello, T.forward, T.secur, T.trade, T.inform, T.fax, T.one, T.X000, T.receiv, T.rate, T.product, T.hope, T.follow, T.review, T.power, T.price, T.without, T.link, T.home, T.corp, T.like, T.univers, T.address, T.send, T.option, T.discuss, T.can, T.manag, T.let, T.thing, T.risk, T.get, T.may, T.better, T.visit, T.www, T.list, T.info, T.full, T.repli, T.question, T.schedul, T.continu, T.creat, T.offic, T.file, T.john, T.year, T.doc, T.work, T.web, T.line, T.increas, T.call, T.best, T.wish, T.also, T.differ, T.use, T.happi, T.know, T.want, T.issu, T.set, T.week, T.busi, T.suggest, T.state, T.applic, T.help, T.return, T.talk, T.confer, T.updat, T.still, T.gas, T.buy, T.just, T.look, T.per, T.dear, T.tri, T.approv, T.new, T.good, T.deriv, T.order, T.time, T.day, T.within, T.much, T.market, T.internet, T.deal, T.program, T.right, T.morn, T.effect, T.monday, T.feel, T.contact, T.area, T.don, T.director, T.resum, T.special, T.person, T.depart, T.intern, T.provid, T.origin, T.futur, T.success, T.keep, T.last, T.well, T.credit, T.contract, T.phone, T.servic, T.sorri, T.interest, T.allow, T.avail, T.result, T.financ, T.case, T.chang, T.check, T.anoth, T.report, T.make, T.shall, T.find, T.book, T.need, T.month, T.high, T.version, T.take, T.come, T.hour, T.system, T.write, T.relat, T.form, T.name, T.addit, T.access, T.way, T.confirm, T.abl, T.assist, T.project, T.immedi, T.next., T.long, T.import, T.friday, T.posit, T.opportun, T.etc, T.either, T.give, T.great, T.analysi, T.cours, T.corpor, T.place, T.copi, T.today, T.mark, T.includ, T.process, T.financi, T.see, T.associ, T.specif, T.april, T.mani, T.particip, T.two, T.develop, T.sent, T.final, T.ask, T.base, T.engin, T.first, T.sure, T.present, T.even, T.read, T.type, T.possibl, T.real, T.data, T.locat, T.requir, T.think, T.peopl, T.interview, T.london, T.sinc, T.cost, T.run, T.howev, T.valu, T.support, T.industri, T.detail, T.recent, T.direct, T.design, T.respond, T.put, T.soon, T.join, T.team, T.member, T.start, T.made, T.short, T.oper, T.public, T.student, T.idea, T.current, T.done, T.alreadi, T.mention, T.expect, T.event, T.respons, T.attend, T.believ, T.end, T.part, T.problem, T.lot, T.tuesday, T.unit, T.say, T.complet, T.invit, T.sever, T.thought, T.note, T.involv, T.plan, T.back, T.juli, T.communic, T.realli, T.term, T.might, T.thursday, T.point, T.appreci, T.bring, T.begin, T.hear, T.comment, T.mean, T.open, T.wednesday, T.given, T.robert, T.experi, T.move, T.sincer, T.understand, T.togeth, T.effort, T.arrang, T.school, T.due
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 232.318 115.562
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 1 0.6 1 0.9600998
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9990805 1 0.8878712
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="fit.data.training.all",
chunk_step_major=glb_script_df[nrow(glb_script_df), "chunk_step_major"],
chunk_step_minor=glb_script_df[nrow(glb_script_df), "chunk_step_minor"]+1,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed11 fit.data.training.all 6 0 1318.755
## elapsed12 fit.data.training.all 6 1 1556.761
glb_rsp_var_out <- paste0(glb_rsp_var_out, tail(names(glb_models_lst), 1))
# Used again in predict.data.new chunk
glb_get_predictions <- function(df) {
if (glb_is_regression) {
df[, glb_rsp_var_out] <- predict(glb_fin_mdl, newdata=df, type="raw")
print(myplot_scatter(df, glb_rsp_var, glb_rsp_var_out,
smooth=TRUE))
df[, paste0(glb_rsp_var_out, ".err")] <-
abs(df[, glb_rsp_var_out] - df[, glb_rsp_var])
print(head(orderBy(reformulate(c("-", paste0(glb_rsp_var_out, ".err"))),
df)))
}
if (glb_is_classification && glb_is_binomial) {
# incorporate glb_clf_proba_threshold
# shd it only be for glb_fin_mdl or for earlier models ?
# for glb_trnent_df it shd opt; vs. assume for glb_newent_df
finmdl_prob_fit <- glb_models_df[glb_models_df$model_id == glb_fin_mdl_id,
"opt.prob.threshold.fit"]
selmdl_prob_fit <- glb_models_df[glb_models_df$model_id == glb_sel_mdl_id,
"opt.prob.threshold.fit"]
if (finmdl_prob_fit != selmdl_prob_fit)
warning("opt.prob.threshold.fit differs for fin_mdl: ", finmdl_prob_fit,
" vs. sel_mdl: ", selmdl_prob_fit)
prob_threshold <- glb_models_df[glb_models_df$model_id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"]
df[, paste0(glb_rsp_var_out, ".prob")] <-
predict(glb_fin_mdl, newdata=df, type="prob")[, 2]
df[, glb_rsp_var_out] <-
factor(levels(df[, glb_rsp_var])[
(df[, paste0(glb_rsp_var_out, ".prob")] >=
prob_threshold) * 1 + 1], levels(df[, glb_rsp_var]))
# prediction stats already reported by myfit_mdl ???
}
if (glb_is_classification && !glb_is_binomial) {
df[, glb_rsp_var_out] <- predict(glb_fin_mdl, newdata=df, type="raw")
}
return(df)
}
glb_trnent_df <- glb_get_predictions(df=glb_trnent_df)
print(glb_feats_df <- mymerge_feats_importance(feats_df=glb_feats_df, sel_mdl=glb_fin_mdl,
entity_df=glb_trnent_df))
## id cor.y exclude.as.feat cor.y.abs
## 297 T.thank -0.2949866270 0 0.2949866270
## 335 T.X2000 -0.2329015803 0 0.2329015803
## 183 T.money 0.1950121271 0 0.1950121271
## 157 T.life 0.2542534429 0 0.2542534429
## 40 T.click 0.2657893248 0 0.2657893248
## 80 T.ect -0.1787183496 0 0.1787183496
## 336 T.X2001 -0.1765484637 0 0.1765484637
## 249 T.research -0.1841301335 0 0.1841301335
## 244 T.remov 0.2431172421 0 0.2431172421
## 202 T.offer 0.1487070000 0 0.1487070000
## 220 T.pleas -0.1909556548 0 0.1909556548
## 277 T.softwar 0.1612232752 0 0.1612232752
## 22 T.attach -0.1712313186 0 0.1712313186
## 205 T.onlin 0.1733740078 0 0.1733740078
## 287 T.subject -0.2742522973 0 0.2742522973
## 327 T.will -0.0665079616 0 0.0665079616
## 61 T.custom 0.1164366855 0 0.1164366855
## 5 T.account 0.1313479737 0 0.1313479737
## 323 T.websit 0.2205915225 0 0.2205915225
## 181 T.model -0.1247536864 0 0.1247536864
## 196 T.num.chars.log -0.1455685879 0 0.1455685879
## 195 T.num.chars -0.0524196291 0 0.0524196291
## 197 T.num.words -0.0527500900 0 0.0527500900
## 198 T.num.words.log -0.1440768533 0 0.1440768533
## 41 T.com -0.0433966800 0 0.0433966800
## 200 T.num.words.unq.log -0.0889207444 0 0.0889207444
## 199 T.num.words.unq -0.0345410286 0 0.0345410286
## 242 T.regard -0.1784745678 0 0.1784745678
## 1 .rnorm -0.0149916451 0 0.0149916451
## 87 T.energi -0.1044570231 0 0.1044570231
## 176 T.meet -0.1411273291 0 0.1411273291
## 128 T.houston -0.1293812505 0 0.1293812505
## 190 T.net 0.1258019309 0 0.1258019309
## 126 T.hou -0.1588378340 0 0.1588378340
## 179 T.messag -0.0005575180 0 0.0005575180
## 130 T.http 0.0800627311 0 0.0800627311
## 168 T.mail 0.1085842197 0 0.1085842197
## 45 T.compani 0.1240836965 0 0.1240836965
## 106 T.free 0.1417992288 0 0.1417992288
## 247 T.request -0.0858771771 0 0.0858771771
## 119 T.has.http 0.1546324286 0 0.1546324286
## 194 T.now 0.1759665053 0 0.1759665053
## 85 T.email 0.1023923165 0 0.1023923165
## 201 T.number -0.0234857598 0 0.0234857598
## 276 T.site 0.1014465328 0 0.1014465328
## 250 T.resourc -0.0292356177 0 0.0292356177
## 63 T.date -0.0812755078 0 0.0812755078
## 105 T.forward -0.1933717118 0 0.1933717118
## 117 T.group -0.1533114490 0 0.1533114490
## 81 T.edu -0.0982918252 0 0.0982918252
## 121 T.hello 0.0233464644 0 0.0233464644
## 143 T.invest 0.1563160859 0 0.1563160859
## 138 T.inform 0.0666736466 0 0.0666736466
## 95 T.fax -0.0670894817 0 0.0670894817
## 264 T.secur 0.1548974786 0 0.1548974786
## 224 T.power -0.0535301754 0 0.0535301754
## 229 T.product 0.0715115284 0 0.0715115284
## 256 T.review -0.0367880188 0 0.0367880188
## 240 T.receiv 0.1687818268 0 0.1687818268
## 204 T.one 0.0506678105 0 0.0506678105
## 305 T.trade -0.0467818091 0 0.0467818091
## 330 T.without 0.1559808594 0 0.1559808594
## 125 T.hope -0.1662362379 0 0.1662362379
## 226 T.price -0.0351068190 0 0.0351068190
## 236 T.rate 0.0107974523 0 0.0107974523
## 103 T.follow -0.0566942671 0 0.0566942671
## 160 T.link 0.0801420776 0 0.0801420776
## 209 T.option -0.0947335835 0 0.0947335835
## 312 T.univers -0.1212534115 0 0.1212534115
## 124 T.home 0.0973272169 0 0.0973272169
## 7 T.address 0.0996708734 0 0.0996708734
## 75 T.discuss -0.1535472399 0 0.1535472399
## 334 T.X000 0.0940159920 0 0.0940159920
## 158 T.like -0.0887544511 0 0.0887544511
## 53 T.corp -0.1147582983 0 0.1147582983
## 235 T.question -0.1341087114 0 0.1341087114
## 298 T.thing 0.0676896421 0 0.0676896421
## 170 T.manag -0.1036145841 0 0.1036145841
## 258 T.risk -0.0881542117 0 0.0881542117
## 266 T.send 0.0376573942 0 0.0376573942
## 161 T.list 0.0912739361 0 0.0912739361
## 318 T.visit -0.0286237098 0 0.0286237098
## 156 T.let -0.1984382892 0 0.1984382892
## 339 T.year -0.0430454013 0 0.0430454013
## 245 T.repli 0.0700943046 0 0.0700943046
## 333 T.www 0.0399480693 0 0.0399480693
## 111 T.get 0.0457042259 0 0.0457042259
## 36 T.can -0.0390445514 0 0.0390445514
## 262 T.schedul -0.1465912289 0 0.1465912289
## 30 T.better -0.0191663387 0 0.0191663387
## 203 T.offic -0.0638067201 0 0.0638067201
## 35 T.call -0.0943589896 0 0.0943589896
## 329 T.within 0.1374174643 0 0.1374174643
## 150 T.just 0.1252238480 0 0.1252238480
## 137 T.info 0.0663165694 0 0.0663165694
## 174 T.may -0.0209343125 0 0.0209343125
## 147 T.john -0.1140403295 0 0.1140403295
## 314 T.use -0.0063128080 0 0.0063128080
## 97 T.file -0.0192972102 0 0.0192972102
## 29 T.best 0.0398959074 0 0.0398959074
## 57 T.creat 0.0301405208 0 0.0301405208
## 255 T.return 0.0478946173 0 0.0478946173
## 118 T.happi -0.0797444804 0 0.0797444804
## 76 T.doc -0.1083611032 0 0.1083611032
## 50 T.continu -0.0383716340 0 0.0383716340
## 320 T.want 0.0323104205 0 0.0323104205
## 325 T.week -0.0916969807 0 0.0916969807
## 215 T.per 0.0492077392 0 0.0492077392
## 146 T.issu -0.0742893516 0 0.0742893516
## 108 T.full 0.0490145812 0 0.0490145812
## 313 T.updat -0.0496413250 0 0.0496413250
## 280 T.special 0.1448517517 0 0.1448517517
## 283 T.state 0.0113101183 0 0.0113101183
## 289 T.suggest -0.1094172581 0 0.1094172581
## 294 T.talk -0.1416300933 0 0.1416300933
## 159 T.line 0.0679815330 0 0.0679815330
## 322 T.web 0.0577720345 0 0.0577720345
## 173 T.market -0.0058950194 0 0.0058950194
## 269 T.set -0.0828285961 0 0.0828285961
## 328 T.wish 0.1186807642 0 0.1186807642
## 135 T.increas 0.0309706690 0 0.0309706690
## 331 T.work -0.0682900073 0 0.0682900073
## 210 T.order 0.0833470697 0 0.0833470697
## 15 T.approv -0.0307077438 0 0.0307077438
## 306 T.tri -0.0038171490 0 0.0038171490
## 33 T.busi 0.1095075205 0 0.1095075205
## 110 T.gas -0.0495248812 0 0.0495248812
## 49 T.contact -0.0699343534 0 0.0699343534
## 99 T.financ -0.1045890279 0 0.1045890279
## 66 T.dear -0.0965980343 0 0.0965980343
## 10 T.also -0.0820622159 0 0.0820622159
## 13 T.applic 0.0050352482 0 0.0050352482
## 257 T.right 0.1060564074 0 0.1060564074
## 64 T.day 0.0007891443 0 0.0007891443
## 182 T.monday -0.1194585534 0 0.1194585534
## 72 T.differ -0.0243752924 0 0.0243752924
## 165 T.look -0.0309057968 0 0.0309057968
## 47 T.confer -0.0954039390 0 0.0954039390
## 122 T.help -0.0597389247 0 0.0597389247
## 284 T.still -0.0663399134 0 0.0663399134
## 191 T.new -0.0132368059 0 0.0132368059
## 169 T.make 0.0631366852 0 0.0631366852
## 154 T.know -0.1658411125 0 0.1658411125
## 232 T.provid 0.0569426417 0 0.0569426417
## 185 T.morn -0.1164137393 0 0.1164137393
## 39 T.check 0.0279556976 0 0.0279556976
## 293 T.take -0.0023729634 0 0.0023729634
## 230 T.program 0.0175949191 0 0.0175949191
## 254 T.resum -0.1121691760 0 0.1121691760
## 34 T.buy 0.0808969779 0 0.0808969779
## 82 T.effect 0.0935998097 0 0.0935998097
## 67 T.depart -0.0904778162 0 0.0904778162
## 246 T.report 0.0623679487 0 0.0623679487
## 187 T.much -0.0103939917 0 0.0103939917
## 309 T.type 0.0109271706 0 0.0109271706
## 217 T.phone -0.0584657986 0 0.0584657986
## 155 T.last -0.0846935116 0 0.0846935116
## 216 T.person 0.0213046754 0 0.0213046754
## 141 T.internet 0.0963431994 0 0.0963431994
## 68 T.deriv -0.0786592130 0 0.0786592130
## 326 T.well -0.0523933425 0 0.0523933425
## 77 T.don 0.0428995876 0 0.0428995876
## 231 T.project -0.0432261317 0 0.0432261317
## 271 T.shall -0.1226099197 0 0.1226099197
## 37 T.case -0.0487433346 0 0.0487433346
## 302 T.time -0.0431450343 0 0.0431450343
## 109 T.futur 0.1027319286 0 0.1027319286
## 189 T.need -0.0523199878 0 0.0523199878
## 211 T.origin -0.0692795506 0 0.0692795506
## 268 T.servic 0.0337403188 0 0.0337403188
## 71 T.develop -0.0720388395 0 0.0720388395
## 292 T.system 0.0324103996 0 0.0324103996
## 123 T.high 0.0464722890 0 0.0464722890
## 139 T.interest -0.0169701603 0 0.0169701603
## 96 T.feel -0.0474010255 0 0.0474010255
## 58 T.credit -0.0287077376 0 0.0287077376
## 65 T.deal -0.0343820752 0 0.0343820752
## 101 T.find -0.0121708040 0 0.0121708040
## 288 T.success 0.0850351303 0 0.0850351303
## 152 T.keep 0.0213379127 0 0.0213379127
## 38 T.chang -0.0200456328 0 0.0200456328
## 74 T.director -0.0597083651 0 0.0597083651
## 8 T.allow -0.0030796408 0 0.0030796408
## 192 T.next. -0.0702153197 0 0.0702153197
## 115 T.good -0.0272279489 0 0.0272279489
## 127 T.hour 0.0281047902 0 0.0281047902
## 31 T.book -0.0625010987 0 0.0625010987
## 279 T.sorri -0.0362540720 0 0.0362540720
## 16 T.april -0.0876138572 0 0.0876138572
## 54 T.corpor 0.0280793191 0 0.0280793191
## 17 T.area -0.0340797023 0 0.0340797023
## 42 T.come -0.0410008395 0 0.0410008395
## 253 T.result 0.0830096996 0 0.0830096996
## 20 T.assist -0.0006189100 0 0.0006189100
## 90 T.etc -0.0652946470 0 0.0652946470
## 316 T.version 0.0472326491 0 0.0472326491
## 140 T.intern -0.0232839939 0 0.0232839939
## 132 T.immedi 0.0636714595 0 0.0636714595
## 6 T.addit -0.0437829267 0 0.0437829267
## 98 T.final -0.0520091641 0 0.0520091641
## 308 T.two -0.0598421384 0 0.0598421384
## 228 T.process -0.0461567802 0 0.0461567802
## 243 T.relat -0.0288844112 0 0.0288844112
## 48 T.confirm -0.0664106329 0 0.0664106329
## 91 T.even 0.0375582942 0 0.0375582942
## 299 T.think -0.0906628209 0 0.0906628209
## 100 T.financi 0.0093179100 0 0.0093179100
## 24 T.avail 0.0203738484 0 0.0203738484
## 332 T.write 0.0163589185 0 0.0163589185
## 19 T.ask -0.1036818687 0 0.1036818687
## 116 T.great -0.0236211947 0 0.0236211947
## 113 T.give -0.0534532759 0 0.0534532759
## 172 T.mark -0.0837776089 0 0.0837776089
## 3 T.abl -0.0579660702 0 0.0579660702
## 51 T.contract -0.0405180394 0 0.0405180394
## 164 T.long -0.0249203970 0 0.0249203970
## 291 T.sure -0.0124904057 0 0.0124904057
## 188 T.name 0.0383030252 0 0.0383030252
## 218 T.place 0.0162785786 0 0.0162785786
## 213 T.particip -0.0649046742 0 0.0649046742
## 56 T.cours -0.0391356944 0 0.0391356944
## 278 T.soon -0.0838069893 0 0.0838069893
## 184 T.month 0.0181070117 0 0.0181070117
## 237 T.read 0.0528329213 0 0.0528329213
## 4 T.access -0.0155755615 0 0.0155755615
## 52 T.copi -0.0643112761 0 0.0643112761
## 267 T.sent -0.0746954965 0 0.0746954965
## 26 T.base 0.0062517784 0 0.0062517784
## 321 T.way 0.0626661827 0 0.0626661827
## 142 T.interview -0.1231062091 0 0.1231062091
## 21 T.associ -0.0644852670 0 0.0644852670
## 12 T.anoth -0.0378074765 0 0.0378074765
## 177 T.member -0.0224540197 0 0.0224540197
## 62 T.data -0.0738487730 0 0.0738487730
## 222 T.posit -0.0360326427 0 0.0360326427
## 102 T.first -0.0077751788 0 0.0077751788
## 238 T.real 0.0176738908 0 0.0176738908
## 104 T.form 0.0338707071 0 0.0338707071
## 88 T.engin 0.0253306210 0 0.0253306210
## 107 T.friday -0.1120976417 0 0.1120976417
## 274 T.sinc -0.0396341699 0 0.0396341699
## 281 T.specif -0.0246339979 0 0.0246339979
## 133 T.import 0.0145290721 0 0.0145290721
## 163 T.london -0.0927913079 0 0.0927913079
## 303 T.today 0.0394257429 0 0.0394257429
## 84 T.either -0.0499022910 0 0.0499022910
## 171 T.mani 0.0362883851 0 0.0362883851
## 223 T.possibl -0.0915695518 0 0.0915695518
## 265 T.see -0.0196578866 0 0.0196578866
## 129 T.howev -0.0712701234 0 0.0712701234
## 225 T.present -0.0794303423 0 0.0794303423
## 208 T.opportun -0.0307220593 0 0.0307220593
## 93 T.expect 0.0497864062 0 0.0497864062
## 136 T.industri 0.0086920687 0 0.0086920687
## 233 T.public -0.0123208718 0 0.0123208718
## 315 T.valu -0.0130686578 0 0.0130686578
## 248 T.requir 0.0004346765 0 0.0004346765
## 273 T.short -0.0169549005 0 0.0169549005
## 234 T.put -0.0359657007 0 0.0359657007
## 260 T.run -0.0669971544 0 0.0669971544
## 55 T.cost -0.0094544027 0 0.0094544027
## 11 T.analysi -0.0910457873 0 0.0910457873
## 311 T.unit -0.0183074565 0 0.0183074565
## 46 T.complet 0.0127091694 0 0.0127091694
## 73 T.direct -0.0576243342 0 0.0576243342
## 134 T.includ 0.0091397492 0 0.0091397492
## 167 T.made 0.0873106432 0 0.0873106432
## 178 T.mention -0.0720983856 0 0.0720983856
## 149 T.juli -0.0541972695 0 0.0541972695
## 252 T.respons -0.0410279129 0 0.0410279129
## 69 T.design 0.0766707755 0 0.0766707755
## 193 T.note -0.0619902255 0 0.0619902255
## 23 T.attend -0.1144674018 0 0.1144674018
## 241 T.recent -0.0199602773 0 0.0199602773
## 25 T.back -0.0569490315 0 0.0569490315
## 212 T.part -0.0184082049 0 0.0184082049
## 148 T.join -0.0533575547 0 0.0533575547
## 9 T.alreadi -0.0250216840 0 0.0250216840
## 227 T.problem -0.0697427037 0 0.0697427037
## 28 T.believ 0.0577847153 0 0.0577847153
## 290 T.support -0.0530429277 0 0.0530429277
## 214 T.peopl 0.0739160630 0 0.0739160630
## 78 T.done 0.0108049574 0 0.0108049574
## 282 T.start 0.0616916942 0 0.0616916942
## 70 T.detail -0.0444871076 0 0.0444871076
## 295 T.team -0.0779983734 0 0.0779983734
## 92 T.event -0.0215873205 0 0.0215873205
## 60 T.current -0.0342745848 0 0.0342745848
## 296 T.term -0.0157925005 0 0.0157925005
## 144 T.invit -0.0890043574 0 0.0890043574
## 270 T.sever 0.0065969188 0 0.0065969188
## 162 T.locat -0.0377448379 0 0.0377448379
## 219 T.plan -0.0215226695 0 0.0215226695
## 251 T.respond -0.0493495900 0 0.0493495900
## 275 T.sincer -0.0414940836 0 0.0414940836
## 131 T.idea -0.0471329213 0 0.0471329213
## 207 T.oper -0.0074123474 0 0.0074123474
## 221 T.point -0.0602478720 0 0.0602478720
## 261 T.say 0.0262260668 0 0.0262260668
## 44 T.communic -0.0500034930 0 0.0500034930
## 166 T.lot 0.0081198806 0 0.0081198806
## 286 T.student -0.0933439568 0 0.0933439568
## 175 T.mean 0.0535012931 0 0.0535012931
## 307 T.tuesday -0.0830155913 0 0.0830155913
## 86 T.end -0.0420623769 0 0.0420623769
## 239 T.realli 0.0019275089 0 0.0019275089
## 180 T.might -0.0460962123 0 0.0460962123
## 145 T.involv 0.0347124963 0 0.0347124963
## 259 T.robert -0.0928419819 0 0.0928419819
## 300 T.thought -0.0624397814 0 0.0624397814
## 94 T.experi -0.0350513391 0 0.0350513391
## 79 T.due 0.0034232838 0 0.0034232838
## 114 T.given -0.0346627918 0 0.0346627918
## 206 T.open -0.0465715601 0 0.0465715601
## 43 T.comment -0.0642596100 0 0.0642596100
## 32 T.bring -0.0331450379 0 0.0331450379
## 27 T.begin -0.0771229704 0 0.0771229704
## 310 T.understand -0.0710122486 0 0.0710122486
## 324 T.wednesday -0.1010425980 0 0.1010425980
## 186 T.move -0.0359061954 0 0.0359061954
## 263 T.school -0.0924453207 0 0.0924453207
## 304 T.togeth -0.0761858529 0 0.0761858529
## 120 T.hear -0.0642102696 0 0.0642102696
## 18 T.arrang -0.1018207213 0 0.1018207213
## 14 T.appreci -0.1221464744 0 0.1221464744
## 301 T.thursday -0.1127452699 0 0.1127452699
## 83 T.effort -0.0034438302 0 0.0034438302
## 2 spam 1.0000000000 1 1.0000000000
## 59 T.crenshaw -0.1311650803 0 0.1311650803
## 89 T.enron -0.1864662681 0 0.1864662681
## 112 T.gibner -0.1296610131 0 0.1296610131
## 151 T.kaminski -0.2650520320 0 0.2650520320
## 153 T.kevin -0.0927983452 0 0.0927983452
## 272 T.shirley -0.1510181510 0 0.1510181510
## 285 T.stinson -0.1414714170 0 0.1414714170
## 317 T.vinc -0.3181138032 0 0.3181138032
## 319 T.vkamin -0.1235350771 0 0.1235350771
## 337 T.X713 -0.1495101398 0 0.1495101398
## 338 T.X853 -0.1140335522 0 0.1140335522
## cor.high.X is.ConditionalX.y is.cor.y.abs.low importance
## 297 <NA> TRUE FALSE 1.000000e+02
## 335 <NA> TRUE FALSE 6.608736e+01
## 183 <NA> TRUE FALSE 6.172287e+01
## 157 <NA> TRUE FALSE 5.265926e+01
## 40 <NA> TRUE FALSE 4.461234e+01
## 80 T.hou TRUE FALSE 2.938385e+01
## 336 <NA> TRUE FALSE 2.745813e+01
## 249 <NA> TRUE FALSE 2.507645e+01
## 244 <NA> TRUE FALSE 2.280910e+01
## 202 <NA> TRUE FALSE 2.237556e+01
## 220 <NA> TRUE FALSE 1.432128e+01
## 277 <NA> TRUE FALSE 1.322665e+01
## 22 <NA> TRUE FALSE 1.290745e+01
## 205 <NA> TRUE FALSE 1.247105e+01
## 287 <NA> TRUE FALSE 1.233926e+01
## 327 <NA> TRUE FALSE 1.179984e+01
## 61 <NA> TRUE FALSE 1.176533e+01
## 5 <NA> TRUE FALSE 1.095184e+01
## 323 <NA> TRUE FALSE 1.070618e+01
## 181 <NA> TRUE FALSE 1.028838e+01
## 196 T.num.words TRUE FALSE 1.003590e+01
## 195 <NA> TRUE FALSE 9.793106e+00
## 197 T.num.words.unq TRUE FALSE 8.262576e+00
## 198 <NA> TRUE FALSE 8.242023e+00
## 41 <NA> TRUE FALSE 7.643294e+00
## 200 <NA> TRUE FALSE 7.594186e+00
## 199 <NA> TRUE FALSE 7.573642e+00
## 242 <NA> TRUE FALSE 7.381999e+00
## 1 <NA> TRUE FALSE 7.241541e+00
## 87 <NA> TRUE FALSE 7.127091e+00
## 176 <NA> TRUE FALSE 6.998823e+00
## 128 <NA> TRUE FALSE 6.400484e+00
## 190 <NA> TRUE FALSE 6.206724e+00
## 126 <NA> TRUE FALSE 6.176825e+00
## 179 <NA> TRUE TRUE 5.990688e+00
## 130 T.www TRUE FALSE 5.863301e+00
## 168 T.report TRUE FALSE 5.636595e+00
## 45 <NA> TRUE FALSE 5.106079e+00
## 106 <NA> TRUE FALSE 4.881867e+00
## 247 <NA> TRUE FALSE 4.527969e+00
## 119 <NA> TRUE FALSE 4.304191e+00
## 194 <NA> TRUE FALSE 4.191452e+00
## 85 <NA> TRUE FALSE 3.688310e+00
## 201 <NA> TRUE FALSE 3.610993e+00
## 276 <NA> TRUE FALSE 3.404694e+00
## 250 <NA> TRUE FALSE 3.387713e+00
## 63 <NA> TRUE FALSE 3.316677e+00
## 105 <NA> TRUE FALSE 3.164263e+00
## 117 <NA> TRUE FALSE 3.104290e+00
## 81 <NA> TRUE FALSE 3.067553e+00
## 121 <NA> TRUE FALSE 3.023926e+00
## 143 <NA> TRUE FALSE 2.691366e+00
## 138 <NA> TRUE FALSE 2.583976e+00
## 95 <NA> TRUE FALSE 2.535793e+00
## 264 <NA> TRUE FALSE 2.488329e+00
## 224 <NA> TRUE FALSE 2.446926e+00
## 229 <NA> TRUE FALSE 2.388973e+00
## 256 <NA> TRUE FALSE 2.373109e+00
## 240 <NA> TRUE FALSE 2.349556e+00
## 204 <NA> TRUE FALSE 2.335957e+00
## 305 <NA> TRUE FALSE 2.276161e+00
## 330 <NA> TRUE FALSE 2.269799e+00
## 125 <NA> TRUE FALSE 2.259180e+00
## 226 <NA> TRUE FALSE 2.164863e+00
## 236 <NA> TRUE TRUE 2.146874e+00
## 103 <NA> TRUE FALSE 1.962448e+00
## 160 <NA> TRUE FALSE 1.864544e+00
## 209 <NA> TRUE FALSE 1.834668e+00
## 312 <NA> TRUE FALSE 1.797042e+00
## 124 <NA> TRUE FALSE 1.772391e+00
## 7 <NA> TRUE FALSE 1.762137e+00
## 75 <NA> TRUE FALSE 1.685287e+00
## 334 <NA> TRUE FALSE 1.660041e+00
## 158 <NA> TRUE FALSE 1.646162e+00
## 53 <NA> TRUE FALSE 1.568135e+00
## 235 <NA> TRUE FALSE 1.527517e+00
## 298 <NA> TRUE FALSE 1.525203e+00
## 170 <NA> TRUE FALSE 1.519744e+00
## 258 <NA> TRUE FALSE 1.517986e+00
## 266 <NA> TRUE FALSE 1.439221e+00
## 161 <NA> TRUE FALSE 1.401317e+00
## 318 <NA> TRUE FALSE 1.370098e+00
## 156 T.know TRUE FALSE 1.368318e+00
## 339 <NA> TRUE FALSE 1.362842e+00
## 245 <NA> TRUE FALSE 1.360943e+00
## 333 <NA> TRUE FALSE 1.350511e+00
## 111 <NA> TRUE FALSE 1.333846e+00
## 36 <NA> TRUE FALSE 1.323737e+00
## 262 <NA> TRUE FALSE 1.320086e+00
## 30 <NA> TRUE FALSE 1.282272e+00
## 203 <NA> TRUE FALSE 1.261170e+00
## 35 <NA> TRUE FALSE 1.228516e+00
## 329 <NA> TRUE FALSE 1.215391e+00
## 150 <NA> TRUE FALSE 1.193519e+00
## 137 <NA> TRUE FALSE 1.185042e+00
## 174 <NA> TRUE FALSE 1.183517e+00
## 147 <NA> TRUE FALSE 1.170712e+00
## 314 <NA> TRUE TRUE 1.157332e+00
## 97 <NA> TRUE FALSE 1.155430e+00
## 29 <NA> TRUE FALSE 1.148288e+00
## 57 <NA> TRUE FALSE 1.136862e+00
## 255 <NA> TRUE FALSE 1.133605e+00
## 118 <NA> TRUE FALSE 1.126790e+00
## 76 <NA> TRUE FALSE 1.120590e+00
## 50 <NA> TRUE FALSE 1.120356e+00
## 320 <NA> TRUE FALSE 1.092866e+00
## 325 <NA> TRUE FALSE 1.082508e+00
## 215 <NA> TRUE FALSE 1.071053e+00
## 146 <NA> TRUE FALSE 1.066592e+00
## 108 <NA> TRUE FALSE 1.063778e+00
## 313 <NA> TRUE FALSE 1.063617e+00
## 280 <NA> TRUE FALSE 1.061592e+00
## 283 <NA> TRUE TRUE 1.043095e+00
## 289 <NA> TRUE FALSE 1.039629e+00
## 294 <NA> TRUE FALSE 1.038376e+00
## 159 <NA> TRUE FALSE 1.031302e+00
## 322 <NA> TRUE FALSE 1.025635e+00
## 173 <NA> TRUE TRUE 1.014849e+00
## 269 <NA> TRUE FALSE 1.013547e+00
## 328 <NA> TRUE FALSE 9.922793e-01
## 135 <NA> TRUE FALSE 9.773019e-01
## 331 <NA> TRUE FALSE 9.708088e-01
## 210 <NA> TRUE FALSE 9.687514e-01
## 15 <NA> TRUE FALSE 9.449306e-01
## 306 <NA> TRUE TRUE 9.439910e-01
## 33 <NA> TRUE FALSE 9.393700e-01
## 110 <NA> TRUE FALSE 9.349747e-01
## 49 <NA> TRUE FALSE 8.961516e-01
## 99 <NA> TRUE FALSE 8.944129e-01
## 66 <NA> TRUE FALSE 8.853751e-01
## 10 <NA> TRUE FALSE 8.671355e-01
## 13 <NA> TRUE TRUE 8.551100e-01
## 257 <NA> TRUE FALSE 8.384531e-01
## 64 <NA> TRUE TRUE 8.270523e-01
## 182 <NA> TRUE FALSE 8.208056e-01
## 72 <NA> TRUE FALSE 8.150723e-01
## 165 <NA> TRUE FALSE 8.120383e-01
## 47 <NA> TRUE FALSE 8.086417e-01
## 122 <NA> TRUE FALSE 8.054626e-01
## 284 <NA> TRUE FALSE 8.008877e-01
## 191 <NA> TRUE TRUE 7.993651e-01
## 169 <NA> TRUE FALSE 7.911753e-01
## 154 <NA> TRUE FALSE 7.844348e-01
## 232 <NA> TRUE FALSE 7.792859e-01
## 185 <NA> TRUE FALSE 7.786581e-01
## 39 <NA> TRUE FALSE 7.695993e-01
## 293 <NA> TRUE TRUE 7.615075e-01
## 230 <NA> TRUE FALSE 7.613492e-01
## 254 <NA> TRUE FALSE 7.605789e-01
## 34 <NA> TRUE FALSE 7.512880e-01
## 82 <NA> TRUE FALSE 7.442511e-01
## 67 <NA> TRUE FALSE 7.301341e-01
## 246 <NA> TRUE FALSE 7.257000e-01
## 187 <NA> TRUE TRUE 7.133183e-01
## 309 <NA> TRUE TRUE 7.119680e-01
## 217 <NA> TRUE FALSE 7.034003e-01
## 155 <NA> TRUE FALSE 7.016744e-01
## 216 <NA> TRUE FALSE 6.995896e-01
## 141 <NA> TRUE FALSE 6.984188e-01
## 68 <NA> TRUE FALSE 6.955233e-01
## 326 <NA> TRUE FALSE 6.914875e-01
## 77 <NA> TRUE FALSE 6.866153e-01
## 231 <NA> TRUE FALSE 6.703914e-01
## 271 <NA> TRUE FALSE 6.656263e-01
## 37 <NA> TRUE FALSE 6.475730e-01
## 302 <NA> TRUE FALSE 6.275713e-01
## 109 <NA> TRUE FALSE 6.098268e-01
## 189 <NA> TRUE FALSE 6.073976e-01
## 211 <NA> TRUE FALSE 6.070179e-01
## 268 <NA> TRUE FALSE 6.067479e-01
## 71 <NA> TRUE FALSE 5.995271e-01
## 292 <NA> TRUE FALSE 5.920637e-01
## 123 <NA> TRUE FALSE 5.919667e-01
## 139 <NA> TRUE FALSE 5.913218e-01
## 96 <NA> TRUE FALSE 5.862760e-01
## 58 <NA> TRUE FALSE 5.846247e-01
## 65 <NA> TRUE FALSE 5.817794e-01
## 101 <NA> TRUE TRUE 5.755572e-01
## 288 <NA> TRUE FALSE 5.695214e-01
## 152 <NA> TRUE FALSE 5.694807e-01
## 38 <NA> TRUE FALSE 5.620778e-01
## 74 <NA> TRUE FALSE 5.601229e-01
## 8 <NA> TRUE TRUE 5.579741e-01
## 192 <NA> TRUE FALSE 5.555810e-01
## 115 <NA> TRUE FALSE 5.523759e-01
## 127 <NA> TRUE FALSE 5.353899e-01
## 31 <NA> TRUE FALSE 5.349219e-01
## 279 <NA> TRUE FALSE 5.301466e-01
## 16 <NA> TRUE FALSE 5.269599e-01
## 54 <NA> TRUE FALSE 5.195754e-01
## 17 <NA> TRUE FALSE 5.176768e-01
## 42 <NA> TRUE FALSE 5.118067e-01
## 253 <NA> TRUE FALSE 4.967109e-01
## 20 <NA> TRUE TRUE 4.946041e-01
## 90 <NA> TRUE FALSE 4.940605e-01
## 316 <NA> TRUE FALSE 4.919417e-01
## 140 <NA> TRUE FALSE 4.916993e-01
## 132 <NA> TRUE FALSE 4.912062e-01
## 6 <NA> TRUE FALSE 4.894067e-01
## 98 <NA> TRUE FALSE 4.872028e-01
## 308 <NA> TRUE FALSE 4.810742e-01
## 228 <NA> TRUE FALSE 4.774605e-01
## 243 <NA> TRUE FALSE 4.760294e-01
## 48 <NA> TRUE FALSE 4.746358e-01
## 91 <NA> TRUE FALSE 4.692611e-01
## 299 <NA> TRUE FALSE 4.665424e-01
## 100 <NA> TRUE TRUE 4.601548e-01
## 24 <NA> TRUE FALSE 4.556937e-01
## 332 <NA> TRUE FALSE 4.544250e-01
## 19 <NA> TRUE FALSE 4.493957e-01
## 116 <NA> TRUE FALSE 4.451296e-01
## 113 <NA> TRUE FALSE 4.445458e-01
## 172 <NA> TRUE FALSE 4.342788e-01
## 3 <NA> TRUE FALSE 4.339199e-01
## 51 <NA> TRUE FALSE 4.338389e-01
## 164 <NA> TRUE FALSE 4.337087e-01
## 291 <NA> TRUE TRUE 4.215818e-01
## 188 <NA> TRUE FALSE 4.209526e-01
## 218 <NA> TRUE FALSE 4.201328e-01
## 213 <NA> TRUE FALSE 4.181039e-01
## 56 <NA> TRUE FALSE 4.179719e-01
## 278 <NA> TRUE FALSE 4.161796e-01
## 184 <NA> TRUE FALSE 4.157089e-01
## 237 <NA> TRUE FALSE 4.099642e-01
## 4 <NA> TRUE FALSE 4.069955e-01
## 52 <NA> TRUE FALSE 4.013440e-01
## 267 <NA> TRUE FALSE 3.987235e-01
## 26 <NA> TRUE TRUE 3.976768e-01
## 321 <NA> TRUE FALSE 3.930502e-01
## 142 <NA> TRUE FALSE 3.897700e-01
## 21 <NA> TRUE FALSE 3.828195e-01
## 12 <NA> TRUE FALSE 3.800957e-01
## 177 <NA> TRUE FALSE 3.774933e-01
## 62 <NA> TRUE FALSE 3.772979e-01
## 222 <NA> TRUE FALSE 3.735132e-01
## 102 <NA> TRUE TRUE 3.717664e-01
## 238 <NA> TRUE FALSE 3.667750e-01
## 104 <NA> TRUE FALSE 3.627678e-01
## 88 <NA> TRUE FALSE 3.609385e-01
## 107 <NA> TRUE FALSE 3.591500e-01
## 274 <NA> TRUE FALSE 3.586978e-01
## 281 <NA> TRUE FALSE 3.559064e-01
## 133 <NA> TRUE TRUE 3.554649e-01
## 163 <NA> TRUE FALSE 3.525703e-01
## 303 <NA> TRUE FALSE 3.507746e-01
## 84 <NA> TRUE FALSE 3.476596e-01
## 171 <NA> TRUE FALSE 3.403453e-01
## 223 <NA> TRUE FALSE 3.395541e-01
## 265 <NA> TRUE FALSE 3.340267e-01
## 129 <NA> TRUE FALSE 3.339675e-01
## 225 <NA> TRUE FALSE 3.317994e-01
## 208 <NA> TRUE FALSE 3.306599e-01
## 93 <NA> TRUE FALSE 3.284891e-01
## 136 <NA> TRUE TRUE 3.262800e-01
## 233 <NA> TRUE TRUE 3.104077e-01
## 315 <NA> TRUE TRUE 3.004025e-01
## 248 <NA> TRUE TRUE 2.985339e-01
## 273 <NA> TRUE FALSE 2.872547e-01
## 234 <NA> TRUE FALSE 2.851651e-01
## 260 <NA> TRUE FALSE 2.833785e-01
## 55 <NA> TRUE TRUE 2.762603e-01
## 11 <NA> TRUE FALSE 2.751153e-01
## 311 <NA> TRUE FALSE 2.745307e-01
## 46 <NA> TRUE TRUE 2.725415e-01
## 73 <NA> TRUE FALSE 2.715341e-01
## 134 <NA> TRUE TRUE 2.708070e-01
## 167 <NA> TRUE FALSE 2.703998e-01
## 178 <NA> TRUE FALSE 2.669726e-01
## 149 <NA> TRUE FALSE 2.620121e-01
## 252 <NA> TRUE FALSE 2.593969e-01
## 69 <NA> TRUE FALSE 2.559516e-01
## 193 <NA> TRUE FALSE 2.491801e-01
## 23 <NA> TRUE FALSE 2.487229e-01
## 241 <NA> TRUE FALSE 2.417892e-01
## 25 <NA> TRUE FALSE 2.411626e-01
## 212 <NA> TRUE FALSE 2.342868e-01
## 148 <NA> TRUE FALSE 2.329263e-01
## 9 <NA> TRUE FALSE 2.319329e-01
## 227 <NA> TRUE FALSE 2.305511e-01
## 28 <NA> TRUE FALSE 2.287674e-01
## 290 <NA> TRUE FALSE 2.284017e-01
## 214 <NA> TRUE FALSE 2.266534e-01
## 78 <NA> TRUE TRUE 2.174878e-01
## 282 <NA> TRUE FALSE 2.148999e-01
## 70 <NA> TRUE FALSE 2.139473e-01
## 295 <NA> TRUE FALSE 2.119703e-01
## 92 <NA> TRUE FALSE 2.084525e-01
## 60 <NA> TRUE FALSE 2.059470e-01
## 296 <NA> TRUE FALSE 1.973741e-01
## 144 <NA> TRUE FALSE 1.928101e-01
## 270 <NA> TRUE TRUE 1.832000e-01
## 162 <NA> TRUE FALSE 1.746699e-01
## 219 <NA> TRUE FALSE 1.724437e-01
## 251 <NA> TRUE FALSE 1.693961e-01
## 275 <NA> TRUE FALSE 1.625820e-01
## 131 <NA> TRUE FALSE 1.497490e-01
## 207 <NA> TRUE TRUE 1.486825e-01
## 221 <NA> TRUE FALSE 1.456293e-01
## 261 <NA> TRUE FALSE 1.440520e-01
## 44 <NA> TRUE FALSE 1.421096e-01
## 166 <NA> TRUE TRUE 1.383660e-01
## 286 <NA> TRUE FALSE 1.345267e-01
## 175 <NA> TRUE FALSE 1.323780e-01
## 307 <NA> TRUE FALSE 1.308920e-01
## 86 <NA> TRUE FALSE 1.217581e-01
## 239 <NA> TRUE TRUE 1.138526e-01
## 180 <NA> TRUE FALSE 1.089084e-01
## 145 <NA> TRUE FALSE 1.062215e-01
## 259 <NA> TRUE FALSE 9.960552e-02
## 300 <NA> TRUE FALSE 9.375876e-02
## 94 <NA> TRUE FALSE 9.016114e-02
## 79 <NA> TRUE TRUE 8.586922e-02
## 114 <NA> TRUE FALSE 7.818477e-02
## 206 <NA> TRUE FALSE 7.627640e-02
## 43 <NA> TRUE FALSE 7.544228e-02
## 32 <NA> TRUE FALSE 6.425930e-02
## 27 <NA> TRUE FALSE 6.320448e-02
## 310 <NA> TRUE FALSE 5.878491e-02
## 324 <NA> TRUE FALSE 4.831499e-02
## 186 <NA> TRUE FALSE 4.620789e-02
## 263 <NA> TRUE FALSE 4.082092e-02
## 304 <NA> TRUE FALSE 3.908406e-02
## 120 <NA> TRUE FALSE 3.538827e-02
## 18 <NA> TRUE FALSE 2.050542e-02
## 14 <NA> TRUE FALSE 1.885191e-02
## 301 <NA> TRUE FALSE 6.993505e-03
## 83 <NA> TRUE TRUE 0.000000e+00
## 2 <NA> NA FALSE NA
## 59 <NA> FALSE FALSE NA
## 89 <NA> FALSE FALSE NA
## 112 <NA> FALSE FALSE NA
## 151 <NA> FALSE FALSE NA
## 153 <NA> FALSE FALSE NA
## 272 <NA> FALSE FALSE NA
## 285 <NA> FALSE FALSE NA
## 317 <NA> FALSE FALSE NA
## 319 <NA> FALSE FALSE NA
## 337 <NA> FALSE FALSE NA
## 338 <NA> FALSE FALSE NA
# Used again in predict.data.new chunk
glb_analytics_diag_plots <- function(obs_df) {
if (length(vars <- subset(glb_feats_df, importance > 0)$id) > 5) {
warning("Limiting important feature scatter plots to 5 out of ", length(vars))
vars <- vars[1:5]
}
for (var in vars) {
plot_df <- melt(obs_df, id.vars=var,
measure.vars=c(glb_rsp_var, glb_rsp_var_out))
# if (var == "<feat_name>") print(myplot_scatter(plot_df, var, "value",
# facet_colcol_name="variable") +
# geom_vline(xintercept=<divider_val>, linetype="dotted")) else
print(myplot_scatter(plot_df, var, "value", colorcol_name="variable",
facet_colcol_name="variable", jitter=TRUE) +
guides(color=FALSE))
}
if (glb_is_regression) {
# plot_vars_df <- subset(glb_feats_df, importance >
# glb_feats_df[glb_feats_df$id == ".rnorm", "importance"])
plot_vars_df <- orderBy(~ -importance, glb_feats_df)
if (nrow(plot_vars_df) == 0)
warning("No important features in glb_fin_mdl") else
print(myplot_prediction_regression(df=obs_df,
feat_x=ifelse(nrow(plot_vars_df) > 1, plot_vars_df$id[2],
".rownames"),
feat_y=plot_vars_df$id[1],
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
id_vars=glb_id_vars)
# + facet_wrap(reformulate(plot_vars_df$id[2])) # if [1 or 2] is a factor
# + geom_point(aes_string(color="<col_name>.fctr")) # to color the plot
)
}
if (glb_is_classification) {
if (nrow(plot_vars_df <- subset(glb_feats_df, importance > 0)) == 0)
warning("No features in selected model are statistically important")
else print(myplot_prediction_classification(df=obs_df,
feat_x=ifelse(nrow(plot_vars_df) > 1, plot_vars_df$id[2],
".rownames"),
feat_y=plot_vars_df$id[1],
rsp_var=glb_rsp_var,
rsp_var_out=glb_rsp_var_out,
id_vars=glb_id_vars)
# + geom_hline(yintercept=<divider_val>, linetype = "dotted")
)
}
}
glb_analytics_diag_plots(obs_df=glb_trnent_df)
## Warning in glb_analytics_diag_plots(obs_df = glb_trnent_df): Limiting
## important feature scatter plots to 5 out of 326
## text
## 1 Subject: naturally irresistible your corporate identity lt is really hard to recollect a company : the market is full of suqgestions and the information isoverwhelminq ; but a good catchy logo , stylish statlonery and outstanding website will make the task much easier . we do not promise that havinq ordered a iogo your company will automaticaily become a world ieader : it isguite ciear that without good products , effective business organization and practicable aim it will be hotat nowadays market ; but we do promise that your marketing efforts will become much more effective . here is the list of clear benefits : creativeness : hand - made , original logos , specially done to reflect your distinctive company image . convenience : logo and stationery are provided in all formats ; easy - to - use content management system letsyou change your website content and even its structure . promptness : you will see logo drafts within three business days . affordability : your marketing break - through shouldn ' t make gaps in your budget . 100 % satisfaction guaranteed : we provide unlimited amount of changes with no extra fees for you to be surethat you will love the result of this collaboration . have a look at our portfolio _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ not interested . . . _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
## 2728 Subject: year end 2000 performance feedback note : you will receive this message each time you are selected as a reviewer . you have been selected to participate in the year end 2000 performance management process by providing meaningful feedback on specific employee ( s ) . your feedback plays an important role in the process , and your participation is critical to the success of enron ' s performance management goals . to complete requests for feedback , access pep at http : / / pep . corp . enron . com and select perform review under performance review services . you may begin providing feedback immediately and are requested to have all feedback forms completed by friday , november 17 , 2000 . if you have any questions regarding pep or your responsibility in the process , please contact the pep help desk at : houston : 1 . 713 . 853 . 4777 , option 4 london : 44 . 207 . 783 . 4040 , option 4 email : perfmgmt @ enron . com thank you for your participation in this important process . the following is a cumulative list of employee feedback requests with a status of " open . " once you have submitted or declined an employee ' s request for feedback , their name will no longer appear on this list . review group : enron feedback due date : nov 17 , 2000 employee name supervisor name date selected - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - andrews , naveen c rudi c zipter oct 31 , 2000 baxter , ashley david davies nov 02 , 2000 campos , hector o peyton s gibner nov 06 , 2000 carson , richard l richard b buy oct 30 , 2000 crenshaw , shirley j wincenty j kaminski oct 26 , 2000 gandy , kristin h celeste c roberts nov 01 , 2000 gorny , vladimir theodore r murphy ii nov 02 , 2000 hewitt , kirstee l steven leppard nov 06 , 2000 hickerson , gary j jeffrey a shankman nov 15 , 2000 kindall , kevin vasant shanbhogue oct 30 , 2000 leppard , steven dale surbey nov 06 , 2000 patrick , christie a mark a palmer nov 09 , 2000 pham , bich anh t sarah brown nov 06 , 2000 raymond , maureen j wincenty j kaminski nov 02 , 2000 rosen , michael b christie a patrick nov 06 , 2000 sun , li kevin kindall nov 09 , 2000 supatgiat , chonawee peyton s gibner oct 27 , 2000 tamarchenko , tanya v vasant shanbhogue oct 26 , 2000 tawney , mark r jeffrey a shankman oct 26 , 2000 thuraisingham , ravi paul h racicot jr nov 12 , 2000 williams , matthew steven leppard nov 08 , 2000 yaman , sevil vasant shanbhogue oct 27 , 2000 yuan , ding richard l carson oct 31 , 2000
## 3209 Subject: re : thanks ! karin , i talked to mike roberts ( the head of the whole weather team ) , and he is saying that all expenses for tony should be charged to global products team . this is agreed between vince and jeff shankman . mike and vince are negotiating with john to put stephen ( or somebody who will replace him ) to some other cost centres ( via research ) . it looks like kevin moore is happy if stephen is charged to the same cost centre as tony . let us right now charge tony and stephen to the cost centre below . please , could we charge them separately - when john and vince make their decision , we should be able to re - charge . many thanks , slava enron capital & trade resources canada corp . from : karin ahamer @ enron 18 / 04 / 2001 15 : 06 to : tani nath / lon / ect @ ect , viacheslav danilov / lon / ect @ ect cc : subject : re : thanks ! tani / slava could you please let me know which costcentre i can bill for any charges relating to tony and stephen . thx karin - - - - - - - - - - - - - - - - - - - - - - forwarded by karin ahamer / eu / enron on 18 / 04 / 2001 15 : 04 - - - - - - - - - - - - - - - - - - - - - - - - - - - enron capital & trade resources corp . from : stephen bennett 18 / 04 / 2001 12 : 14 to : karin ahamer / eu / enron @ enron cc : subject : re : thanks ! - - - - - - - - - - - - - - - - - - - - - - forwarded by stephen bennett / na / enron on 04 / 18 / 2001 06 : 11 am - - - - - - - - - - - - - - - - - - - - - - - - - - - kevin g moore @ ect 04 / 18 / 2001 06 : 11 am to : stephen bennett / na / enron @ enron cc : subject : re : thanks ! r . c . 107043 co . # 0413 stephen , all charges for soft ware that you use in london should be charged to the same cost center as tony hamilton , reason being , is that someone will replace you in that position . thanks kevin moore enron north america corp . from : stephen bennett @ enron 04 / 18 / 2001 05 : 08 am to : karin ahamer / eu / enron @ enron cc : kevin g moore / hou / ect @ ect subject : re : thanks ! you can cost it to my group in houston . kevin moore has the proper number enron capital & trade resources canada corp . from : karin ahamer 04 / 18 / 2001 05 : 06 am to : kevin g moore / hou / ect @ ect cc : tony hamilton / eu / enron @ enron , mike a roberts / hou / ect @ ect , stephen bennett / na / enron @ enron , tani nath / lon / ect @ ect subject : re : thanks ! kevin do you know whose costcentre the microsoft frontpage is supposed to go on ? thx karin enron capital & trade resources corp . from : stephen bennett 18 / 04 / 2001 10 : 10 to : karin ahamer / eu / enron @ enron cc : kevin g moore / hou / ect @ ect , tony hamilton / eu / enron @ enron , mike a roberts / hou / ect @ ect subject : thanks ! hi karin . . . i hope you had a splendid holiday ! i wanted to thank you for getting tony and i set up here last week . we seem to have established a steady daily routine and are supporting several different trading groups in london as well as continuing our daily support of traders in houston . we ' ve gotten far more requests for information than we expected , so as a result i will be remaining in london a little longer than originally expected . as of now - i ' m not sure exactly when i ' ll be going back to houston - but vince has let me know that i will remain here as long as necessary to ensure adequate daily weather support for the london traders . to that end - i think i will need one additional piece of software installed on the machine that i will be using on a regular basis . could i please get microsoft frontpage installed as soon as we can get it ? that leads to the second issue of desks . i know that space is a premium here - and i understand that i may need to move around some as a result . i certainly want keep things as simple as possible for everyone - but i also wanted to make sure that you know that there are certain applications that are essential for the daily trader support in london and houston . as such , if i move , we will need to make sure that these applications are available from the beginning of the day ( we start about 0600 ) . the applications are : 1 ) adobe acrobat - full version 2 ) accuweather for windows - ( this is something i will need to install ) 3 ) microsoft front page 4 ) terminal server 5 ) the full ms office software package one idea would be to have a pc move with me - that way we would not need to reinstall this software which could cause problems with the daily support routine . thanks again for all of your help . i will let you know - once i know - how long i will be here . i should hear something from vince over the next few days giving me an idea . cheers , steve stephen bennett senior meteorologist enron research temporarily in london : ext 3 - 4761 otherwise : ( 713 ) 345 - 3661
## spam .rnorm spam.fctr T.X000 T.X2000 T.X2001 T.X713 T.X853 T.abl
## 1 1 2.45899120 Y 0 0 0 0 0 0
## 2728 0 -0.99751230 N 0 27 0 1 1 0
## 3209 0 -0.01833236 N 0 0 8 1 0 1
## T.access T.account T.addit T.address T.allow T.alreadi T.also
## 1 0 0 0 0 0 0 0
## 2728 1 0 0 0 0 0 0
## 3209 0 0 1 0 0 0 1
## T.analysi T.anoth T.applic T.appreci T.approv T.april T.area T.arrang
## 1 0 0 0 0 0 0 0 0
## 2728 0 0 0 0 0 0 0 0
## 3209 0 0 3 0 0 0 0 0
## T.ask T.assist T.associ T.attach T.attend T.avail T.back T.base
## 1 0 0 0 0 0 0 0 0
## 2728 0 0 0 0 0 0 0 0
## 3209 0 0 0 0 0 1 1 0
## T.begin T.believ T.best T.better T.book T.bring T.busi T.buy T.call
## 1 0 0 0 0 0 0 2 0 0
## 2728 1 0 0 0 0 0 0 1 0
## 3209 1 0 0 0 0 0 0 0 0
## T.can T.case T.chang T.check T.click T.com T.come T.comment
## 1 0 0 2 0 0 0 0 0
## 2728 0 0 0 0 0 2 0 0
## 3209 3 0 0 0 0 0 0 0
## T.communic T.compani T.complet T.confer T.confirm T.contact T.continu
## 1 0 3 0 0 0 0 0
## 2728 0 0 2 0 0 1 0
## 3209 0 0 0 0 0 0 1
## T.contract T.copi T.corp T.corpor T.cost T.cours T.creat T.credit
## 1 0 0 0 1 0 0 0 0
## 2728 0 0 1 0 0 0 0 0
## 3209 0 0 5 0 5 0 0 0
## T.crenshaw T.current T.custom T.data T.date T.day T.deal T.dear
## 1 0 0 0 0 0 1 0 0
## 2728 1 0 0 0 2 0 0 0
## 3209 0 0 0 0 0 2 0 0
## T.depart T.deriv T.design T.detail T.develop T.differ T.direct
## 1 0 0 0 0 0 0 0
## 2728 0 0 0 0 0 0 0
## 3209 0 0 0 0 0 1 0
## T.director T.discuss T.doc T.don T.done T.due T.ect T.edu T.effect
## 1 0 0 0 0 1 0 0 0 2
## 2728 0 0 0 0 0 1 0 0 0
## 3209 0 0 0 0 0 0 17 0 0
## T.effort T.either T.email T.end T.energi T.engin T.enron T.etc T.even
## 1 1 0 0 0 0 0 0 0 1
## 2728 0 0 1 2 0 0 4 0 0
## 3209 0 0 0 1 0 0 24 0 0
## T.event T.expect T.experi T.fax T.feel T.file T.final T.financ
## 1 0 0 0 0 0 0 0 0
## 2728 0 0 0 0 0 0 0 0
## 3209 0 2 0 0 0 0 0 0
## T.financi T.find T.first T.follow T.form T.forward T.free T.friday
## 1 0 0 0 0 0 0 0 0
## 2728 0 0 0 1 1 0 0 1
## 3209 0 0 0 0 0 2 0 0
## T.full T.futur T.gas T.get T.gibner T.give T.given T.good T.great
## 1 1 0 0 0 0 0 0 2 0
## 2728 0 0 0 0 2 0 0 0 0
## 3209 2 0 0 3 0 1 0 0 0
## T.group T.happi T.hear T.hello T.help T.high T.home T.hope T.hou
## 1 0 0 0 0 0 0 0 0 0
## 2728 1 0 0 0 1 0 0 0 0
## 3209 2 1 1 0 1 0 0 1 5
## T.hour T.houston T.howev T.http T.idea T.immedi T.import T.includ
## 1 0 0 0 0 0 0 0 0
## 2728 0 1 0 1 0 1 2 0
## 3209 0 4 0 0 2 0 0 0
## T.increas T.industri T.info T.inform T.interest T.intern T.internet
## 1 0 0 0 1 1 0 0
## 2728 0 0 0 0 0 0 0
## 3209 0 0 0 1 0 0 0
## T.interview T.invest T.invit T.involv T.issu T.john T.join T.juli
## 1 0 0 0 0 0 0 0 0
## 2728 0 0 0 0 0 0 0 0
## 3209 0 0 0 0 1 2 0 0
## T.just T.kaminski T.keep T.kevin T.know T.last T.let T.life T.like
## 1 0 0 0 0 0 0 0 0 0
## 2728 0 2 0 2 0 0 0 0 0
## 3209 0 0 1 8 7 1 4 0 1
## T.line T.link T.list T.locat T.london T.long T.look T.lot T.made
## 1 0 0 1 0 0 0 1 0 1
## 2728 0 0 2 0 1 0 0 0 0
## 3209 0 0 0 0 6 2 1 0 0
## T.mail T.make T.manag T.mani T.mark T.market T.may T.mean T.meet
## 1 0 2 1 0 0 4 0 0 0
## 2728 0 0 2 0 2 0 1 0 0
## 3209 0 3 0 1 0 0 1 0 0
## T.member T.mention T.messag T.might T.model T.monday T.money T.month
## 1 0 0 0 0 0 0 0 0
## 2728 0 0 1 0 0 0 0 0
## 3209 0 0 0 0 0 0 0 0
## T.morn T.move T.much T.name T.need T.net T.new T.next. T.note T.now
## 1 0 0 2 0 0 0 0 0 0 0
## 2728 0 0 0 3 0 0 0 0 1 0
## 3209 0 3 0 0 5 0 0 1 0 2
## T.number T.offer T.offic T.one T.onlin T.open T.oper T.opportun
## 1 0 0 0 0 0 0 0 0
## 2728 0 0 0 0 0 1 0 0
## 3209 1 0 1 2 0 0 0 0
## T.option T.order T.origin T.part T.particip T.peopl T.per T.person
## 1 0 1 1 0 0 0 0 0
## 2728 2 0 0 0 3 0 0 0
## 3209 0 0 1 0 0 0 0 0
## T.phone T.place T.plan T.pleas T.point T.posit T.possibl T.power
## 1 0 0 0 0 0 0 0 0
## 2728 0 0 0 1 0 0 0 0
## 3209 0 0 0 3 0 1 1 0
## T.present T.price T.problem T.process T.product T.program T.project
## 1 0 0 0 0 1 0 0
## 2728 0 0 0 4 0 0 0
## 3209 0 0 1 0 1 0 0
## T.provid T.public T.put T.question T.rate T.read T.real T.realli
## 1 2 0 0 0 0 0 0 1
## 2728 2 0 0 1 0 0 0 0
## 3209 0 0 1 0 0 0 0 0
## T.receiv T.recent T.regard T.relat T.remov T.repli T.report T.request
## 1 0 0 0 0 0 0 0 0
## 2728 1 0 1 0 0 0 0 4
## 3209 0 0 0 1 0 0 0 1
## T.requir T.research T.resourc T.respond T.respons T.result T.resum
## 1 0 0 0 0 0 1 0
## 2728 0 0 0 0 1 0 0
## 3209 0 2 4 0 0 2 0
## T.return T.review T.right T.risk T.robert T.run T.say T.schedul
## 1 0 0 0 0 0 0 0 0
## 2728 0 4 0 0 1 0 0 0
## 3209 0 0 1 0 3 0 1 0
## T.school T.secur T.see T.send T.sent T.servic T.set T.sever T.shall
## 1 0 0 1 0 0 0 0 0 0
## 2728 0 0 0 0 0 1 0 0 0
## 3209 0 0 0 0 0 0 1 1 0
## T.shirley T.short T.sinc T.sincer T.site T.softwar T.soon T.sorri
## 1 0 0 0 0 0 0 0 0
## 2728 1 0 0 0 0 0 0 0
## 3209 0 0 0 0 0 3 1 0
## T.special T.specif T.start T.state T.still T.stinson T.student
## 1 1 0 0 0 0 0 0
## 2728 0 1 0 0 0 0 0
## 3209 0 0 1 0 0 0 0
## T.subject T.success T.suggest T.support T.sure T.system T.take T.talk
## 1 1 0 0 0 0 1 0 0
## 2728 1 1 0 0 0 0 0 0
## 3209 7 0 0 5 3 0 0 1
## T.team T.term T.thank T.thing T.think T.thought T.thursday T.time
## 1 0 0 0 0 0 0 0 0
## 2728 0 0 1 0 0 0 0 1
## 3209 2 0 11 1 1 0 0 0
## T.today T.togeth T.trade T.tri T.tuesday T.two T.type T.understand
## 1 0 0 0 0 0 0 0 0
## 2728 0 0 0 0 0 0 0 0
## 3209 0 0 5 0 0 0 0 1
## T.unit T.univers T.updat T.use T.valu T.version T.vinc T.visit
## 1 0 0 0 1 0 0 0 0
## 2728 0 0 0 0 0 0 0 0
## 3209 0 0 0 2 0 1 5 0
## T.vkamin T.want T.way T.web T.websit T.wednesday T.week T.well T.will
## 1 0 0 0 0 2 0 0 0 6
## 2728 0 0 0 0 0 0 0 0 2
## 3209 0 3 1 0 0 0 1 1 10
## T.wish T.within T.without T.work T.write T.www T.year T.has.http
## 1 0 1 1 0 0 0 0 0
## 2728 0 0 0 0 0 0 2 1
## 3209 0 0 0 0 0 0 0 0
## T.num.chars T.num.words T.num.words.unq T.num.chars.log
## 1 1484 120 96 7.303170
## 2728 2553 264 146 7.845416
## 3209 4904 442 195 8.498010
## T.num.words.log T.num.words.unq.log spam.fctr.predict.Final.rf.prob
## 1 4.795791 4.574711 0.996
## 2728 5.579730 4.990433 0.000
## 3209 6.093570 5.278115 0.010
## spam.fctr.predict.Final.rf spam.fctr.predict.Final.rf.accurate .label
## 1 Y TRUE .1
## 2728 N TRUE .2728
## 3209 N TRUE .3209
replay.petrisim(pn=glb_analytics_pn,
replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"data.training.all.prediction","model.final")), flip_coord=TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## 2.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction firing: model.selected
## 3.0000 3 0 2 1 0
## 3.0000 multiple enabled transitions: model.final data.training.all.prediction data.new.prediction firing: data.training.all.prediction
## 4.0000 5 0 1 1 1
## 4.0000 multiple enabled transitions: model.final data.training.all.prediction data.new.prediction firing: model.final
## 5.0000 4 0 0 2 1
glb_script_df <- rbind(glb_script_df,
data.frame(chunk_label="predict.data.new",
chunk_step_major=max(glb_script_df$chunk_step_major)+1,
chunk_step_minor=0,
elapsed=(proc.time() - glb_script_tm)["elapsed"]))
print(tail(glb_script_df, 2))
## chunk_label chunk_step_major chunk_step_minor elapsed
## elapsed12 fit.data.training.all 6 1 1556.761
## elapsed13 predict.data.new 7 0 1565.909
7: predict data.new# Compute final model predictions
glb_newent_df <- glb_get_predictions(glb_newent_df)
glb_analytics_diag_plots(obs_df=glb_newent_df)
## Warning in glb_analytics_diag_plots(obs_df = glb_newent_df): Limiting
## important feature scatter plots to 5 out of 326
## text
## 8 Subject: are you ready to get it ? hello ! viagra is the # 1 med to struggle with mens ' erectile dysfunction . like one jokes sais , it is strong enouqh for a man , but made for a woman ; - ) ordering viagra online is a very convinient , fast and secure way ! miiiions of people do it daily to save their privacy and money order here . . .
## 50 Subject: peniss growth patches are here ! . . . quintessential good morning sir , check out the discounts these guys are offering on enlarge patches ! steel package : 10 patches reg $ 79 . 95 now $ 49 . 95 ! free shipping too ! silver package : 25 patches reg $ 129 . 95 , now $ 99 . 95 ! free shipping and free exercise manual included ! gold package : 40 patches reg $ 189 . 95 , now $ 149 . 95 ! free shipping and free exercise manual included ! platinum package : 65 patches reg $ 259 . 95 , now $ 199 . 95 ! free shipping and free exercise manual included ! millions of men are taking advantage of this revolutionary new product - don ' t be left behind ! " my wife has become so much more interested in sex and now often initiates . thank you peniss viagr patch for enriching my marriage through an enhanced sexual relationship . " - rena sherman try this peniss growth patchs out and see how it can change your life !
## 80 Subject: free 1 week dvd downloads we are happy to offer you . . . . all the dvd ' s you could ever watch for free . . . what ever your pleasure we have it all . . . take as many as you want and it costs you nothing . . . . check out the 1000 ' s of titles . . . . dont know where to get your adult dvds ? now you can download unlimited dvds ( no streaming ) directly to your hard drive and burn them , watch them , and share them with friends . make movies for the road , your home or even for parties . cognizable everyman cranky legitimacy wedge keenan keenan description day keenan cognizable bellini patient notate pow youth thermionic zig autocratic crewmen pickering streetcar componentry anselm cadaver sciatica dunham hindmost thanks but its not for me : - )
## 188 Subject: don ' t lose your data ! prevent future computer problems at a fraction of the cost of repairs web and order form : pro - techonline . com e - mail - info @ pro - techonline . com fax 206 - 937 - 4315 call today at 1 - 800 - 726 - 5997 understand the operating environments your systems are in , computers by their very nature , pull air in to keep their parts cool . their fans work 24 / 7 keeping cool . the problem with this is over a period of time dust , dirt and smoke , will build up and damage your computer . q damage your computer chips q destroy the data on your hard drive q melt the mother board what does this mean to you ? all of your information could be lost forever ! all of your files and data - works in progress - and contacts could be destroyed ! the information you have saved is a thousands times more valuable than the computer itself . thats why technicians recommend backups for your data . its all p r e v e n t a b l e ! the pro - tech computer filtration system simply works by filtering the air before air reaches your computers sensitive internal components . the system is set up at the base of your computer , with 2 easy steps , and is positioned in front of your computers air intake , filtering 98 % of the air particles that would otherwise get into your systems . for a little as $ 24 . 95 plus shipping , you can make sure you are completely protected and never worry about dust , smoke , moisture , etc . damaging your computer again ! web and order form : pro - techonline . com e - mail - info @ pro - techonline . com fax 206 - 937 - 4315 product availability : please allow up to 10 days for delivery . pro - tech will do everything we can to ship your order as soon as possible and notify you of the estimated time of delivery . for great rates on quantity , and wholesale pricing : order 5 or more and receive a 10 % discount ! wholesale orders save up to 20 % or more ! pro - tech uses ups ground for shipping and handling $ 5 . 85 pro - tech prides it self on protecting consumers financial privacy and safety . no more cans of air , no more bags , no more computer vacs . perfect for home , office , and industry model # 1 $ 24 . 95 model # 2 $ 29 . 95 model # 3 $ 36 . 95 our systems are fully adjustable and will fit any computer , one size fits all . installs in seconds ! step # 1 : place the base of the filtration system onto your computer . step # 2 : place the top of the filtration system onto the base . youre all set ; and you can access your disk drives as needed . replacement filters 1 size filter fits all three systems a package of 4 large heavy duty hepa filters , micron rated and designed , to last up to 6 months , $ 9 . 85 perfect for industry . alternative replacement filters a large heavy duty diffusion dual stage filtering material . designed to last up to 6 months . package of 4 $ 7 . 85 perfect for home and office . call today at 1 - 800 - 726 - 5997 web and order form : pro - techonline . com e - mail - info @ pro - techonline . com fax 206 - 937 - 4315 pro - tech filtration systems 3701 sw southern seattle , wa 98216 this e - mail message is an advertisement and / or solicitation .
## 211 Subject: partnership for raising awareness hello , my name is shane lamotte and i ' m in the new rock band living illusion . how are you ? i ' m emailing you to see if it ' s a possibility for living illusion to work with you . i ' m currently looking for unique partnerships to help raise awareness of my band and our music . if you want to check out my band and listen to some tunes go to : http : / / www . livingillusion . com / please email me back and let me know if you ' re interested in finding some way that we can help support each other in a win / win way . thanks , shane lamotte www . livingillusion . com ps also if your interested in exchanging links between my website and yours just let me know and we ' ll make it happen : )
## 257 Subject: otc gdvi - the momentum continues - gdvi website debute otc bbalert gdvi news update : global diversified industries debutes brand new website ! www . gdvi . net otc bb alert spectacular operating results momentum continues sales projections for next 12 months surpasses $ 20 million 99 . 8 % revenue increase 278 % net income increase 154 % stockholders equity increase 79 % increase in assets $ 8 million order backlog $ 50 million manufacturing capacity overview global diversified industries operates in the modular building construction industry , and strategically targets the california education sector . gdvi is strategically located in central california on 16 acres with a 100 , 000 square foot state - of - the - art manufacturing facility . throughout 2003 and 2004 the company focused on building its infrastructure through acquisitions , development of a state - of - the - art manufacturing facility , and by securing the requisite financing facilities to fuel business growth . through the combination of its new facility ( $ 50 million capacity ) , the increased demand for portable buildings and the state of california ' s bond approvals , gdvi has become well positioned to become one of the dominant leaders in modular manufacturing on the west coast . gdvi should benefit greatly from the $ 12 . 1 billion school improvement bond that is expected to be passed by california voters next month ( march ) . this presents , in our opinion an opportunity for early investors of gdvi to also benefit before the mainstream investor realizes who the benefactors are and subsequently invests in those companies . global diversified has taken numerous strategic development steps throughout 2003 and 2004 , including generating strong revenues as well as profits and is now poised for explosive growth in 2005 . the company is led by a strong management team with previous success in building companies into $ 50 million per annum businesses . in the past year gdvi has exceeded its own sales revenue projections , renewed its piggyback contract , received state approvals on engineered product designs , started a new credit facility and opened its new 100 , 000 square foot manufacturing facility . the company will continue to seek new acquisition candidates through its aggressive growth plan . profile gdvi is a holding company that currently operates two wholly owned subsidiaries , mbs construction inc . , a modular contractor specializing in modular construction site work and renovation and global modular , inc . , a sales , marketing and manufacturing of modular type structures . its principal customer base is currently educational ( public and private schools , universities , etc . ) , child - care and municipality sectors . its product lines consist of a variety of portable classroom designs , including both single - story and two - story floor plans . global modular ' s portable classroom structures are engineered and constructed in accordance with pre - approved building plans , commonly referred to as p . c . ' s or pre - checked plans , that conform to structural and seismic safety specifications administered by the california department of state architects ( dsa ) . global modular also enjoys the benefit of providing educational customers with products contracted under a piggyback clause . the state of california allows school districts to canvass proposals from modular classroom vendors under a bidding process where the successful bidder can provide other public school districts and municipalities portable classrooms under a piggyback contract issued by the originating school district . this process saves school districts valuable time and resources from the necessity of soliciting bids . a modular vendor who possesses a piggyback contract containing competitive pricing and a variety of design options may have access to future business for up to five years , depending on the term of the piggyback contract . the strategic focus on california schools since 1998 , california legislation has required that at least 20 % of all new classrooms constructed with state funds be portable structures . there are five compelling reasons for this trend : modular classrooms are faster to construct ( as quickly as 2 weeks ) they cost significantly less ( as low as $ 30 , 000 vs . $ 100 , 000 ) they offer greater flexibility for use compared to conventional buildings they are easier to finance they provide financing incentives to cope with population growth , the state department of education estimates that california will need more than 2 , 500 classrooms each year for the next four years , which equates to more than 10 , 000 classrooms . due to the current and projected budget cuts throughout the california education sector , public and private schools are expected to turn to portable / modular construction to fulfill their additional classroom requirements over the next four years . the california schools budget crisis on november 5 , 2002 a $ 13 . 2 billion school facilities improvements bond proposal ( proposition 47 ) was passed by california voters . this bond measure passage does not include an approximate $ 9 . 4 billion worth of local bond measures passed by various school districts throughout the state . a second bond measure worth $ 12 . 1 billion went before the voters on the march 2004 . these bond measures are about three times higher than the record $ 9 . 2 billion bond california voters approved in 1998 . the revenue generated from these bond measures will be used for school modernization programs , which include requirements for relocatable classrooms and modular classroom construction and renovations . money from the bonds will help overcrowded public and private schools ; design upgrades and expand building space at community colleges and other institutions of higher learning throughout california . gdvi business infrastructure among global modular ' s asset base is its integrated , state - of - the - art , automated manufacturing process which includes equipment , raw material and marketing collateral that are specifically designed for the high capacity fabrication of modular structures . gdvi employs a workforce of 60 employees and is looking to add to its workforce as demand increases . operates out of a sixteen acre site with a 100 , 000 square foot operating structure . wholly owned subsidiary ( global modular inc . ) markets , designs and manufactures the buildings wholly owned subsidiary ( mbs corporation ) handles installation and building renovation . symbol otc . bb gdvi recent price 13 . 5 cents management team philip hamilton , ceo and president mr . hamilton has an extensive and very successful background in modular manufacturing . from 1996 to feb 2000 he served as chairman and ceo of pacesetter industries inc . he built this company from inception into one of californias largest manufacturers , producing and installing thousands of schools and commercial buildings . under his leadership , pacesetter industries moved into a 5 , 500 , 000 sq . ft . facility in atwater , california with branch sales offices throughout the state . the company employed a staff of over 650 employees and had annual sales of $ 50 , 000 , 000 . adam de bard , vice president mr . de bard has over 6 years of experience in the manufacturing and business sectors . from 1997 to 2000 he served as vice president and chief information officer of pacesetter industries . ronald kilpatrick , director of finance mr . kilpatrick has 36 years experience in both domestic and international development and management of major corporations . he is a managing partner of pacific rim capital llc which provides venture capital to projects in the pacific rim . recent headlines global diversified industries , inc . commencing efforts to increase its u . s . based investor and public relations visibility pr newswire ( tue 5 : 00 am ) global diversified industries , inc . modular division secures new order worth more than $ 3 million for immediate delivery pr newswire ( thu , jun 9 ) wallst . net airing all - new , exclusive audio interviews with gdvi and geoi pr newswire ( tue , jun 7 ) wallst . net airing exclusive audio interviews with gdvi and xle pr newswire ( thu , jun 2 ) global diversified industries , inc . acquires valuable assets from california modular company pr newswire ( thu , may 26 ) global diversified industries , inc . modular division implementing its fourth production line pr newswire ( thu , may 19 ) talkingstocks . com announces interview with philip hamilton , president and ceo of global diversified industries , inc . primezone media network ( tue , may 17 ) stockguru . com initiates profile coverage of global diversified industries , inc . primezone media network ( mon , may 16 ) global diversified industries ' modular division billings total $ 1 . 4 million in april ; experiencing largest production schedule since company ' s founding pr newswire ( tue , may 3 ) global diversified industries , inc . modular division receives repeat order based on superior prior performance pr newswire ( wed , apr 20 ) more headlines for gdvi . ob . . . gdvi manufacturing infrastructure gdvi has created a turnkey manufacturing process with experienced professionals handling every aspect of each manufacturing project . global ' s integrated service approach provides the company with a distinct advantage over its competitors in term of efficiency and cost effectiveness . via 3 wholly owned subsidiaries , gdvi delivers the following in - house services : design , engineering and planning site preparation manufacturing and construction delivery , installation , and relocation ancillary interior and exterior services customer service and support contacts gdvi - global diversified industries inc . 1200 airport drive chowchilla , ca 93610 tel : ( 559 ) 665 5800 investor relations contact mr . paul knopick tel : ( 949 ) 707 - 5365 pknopick @ . com stock quotes http : / / finance . yahoo . com / q ? s = gdvi . ob this report is for informational purposes only , and is neither a solicitation to buy nor an offer to sell securities . investment in low - priced small and micro - cap stocks are considered extremely speculative and may result in the loss of some or all of any investment made in these companies . expedite is not a registered investment advisor or a broker - dealer . information , opinions and analysis contained herein are based on sources believed to be reliable , but no representation , expressed or implied , is made as to its accuracy , completeness or correctness . the opinions contained herein reflect our current judgment and are subject to change without notice . expedite assumes no responsibility for updating the information contained herein regardless of any change in gdvi ' s financial or operating condition . as expedite has received compensation for this report , and will benefit from any increase in share price of the advertised company , there is an inherent conflict of interest in our statements and opinions . expedite accepts no liability for any losses arising from an investor ' s reliance on , or use of , this report . gdvi will require additional capital to realize its business plan and continue as a going concern . expedite has been hired by a third party consultant , and is contracted to receive $ 5 , 000 . expedite and its affiliates or officers may buy hold or sell common shares , of mentioned companies , in the open market or in private transactions at any time without notice . certain information included herein is forward - looking within the context of the private securities litigation reform act of 1995 , including , but not limited to , statements concerning manufacturing , marketing , growth , and expansion . the words " may , " " would , " " will , " " expect , " " estimate , " " anticipate , " " believe , " " intend , " and similar expressions and variations thereof are intended to identify forward - looking statements . such forward - looking information involves important risks and uncertainties that could affect actual results and cause them to differ materially from expectations expressed herein . global diversified industries , inc . 1200 airport dr . chowchilla , ca 93610 this e - mail message is an advertisement and / or solicitation .
## 297 Subject: urgent reply needed from : mr . usman bello . attention sir in appreciation of your esteemed contact received through a reliable source , first of all , i wish to introduce myself to you , i am mr usman bello . the only surviving son of the late dr . mustapha bello who was one of the aid to the former leader of my country iraq before he was killed in a war in my country . i know that this mail will come to you as a surprise but honestly i do not intend to surprise you . i write this letter in respect of my intention to invest the sum of us $ 10 , 000 , 000 . 00 in your company which i inherited from my father proceeds before his death . my mother is from haiti while my father is from iraq before they got married as husband and wife . i am now left with my only surviving mother who unfortunately has been critically ill since late last year because of the shock the death of my late father caused her . when my father with the rest members of my family was killed on 16 th january 2003 during the war , i and my mother escaped to iran with the help of united nations officials from there we came to thailand through the united nation peace keeping pilot . the fund is now with the financial firm . in view of this plight , i expect you to be trust worthy and kind enough to assist me , i hereby agree to compensate your sincere and candid effort in this regard with 20 % of the total fund and 10 % for expenses , which may arise during the transaction . whatever your decision is , please contact me immediately through the above email . i also appeal to you to keep this matter secret for the interest of my family . best regards . usman bello .
## 336 Subject: major medical breakthrough huge profit potential major medical breakthroughhuge profit potential imagine yourself as part owner of the most interesting , full service state - of - the - art medical facility , equipped with the most sophisticated and effective scanning diagnostic tools available today . electron beam tomography is a cutting - edge diagnostic technology capable of providing a crystal - ball - like look into your medical future . this technology has been featured on oprah , larry king , good morning america , and usa today . ebt scans are now covered by most health insurance companies and hmos , causing an explosion in usership and exceptionally high demand for this procedure . ebt can identify heart disease years before a treadmill test would show an abnormality and many years before a heart attack might occur . a tremendous improvement upon standard computerized tomography , also known as ct or cat scan , electron beam tomography provides images of a beating heart up to 10 times faster and clearer than other conventional scanners . the dramatic capabilities of this spectacular technology should provide an extraordinary investment opportunity for those establishing state - of - the - art outpatient clinics , in order to provide the ebt body scan procedures to health conscious americans . projected 10 - year return of 916 % . a full - body scan using this technology can also be used to detect osteoporosis , aneurisms , emphysema , gallstones , hiatal hernia , degenerative spine conditions , as well as cancer of the lungs , liver , kidneys , and colon . imagine being instrumental in bringing the most revolutionary diagnostic and preventative medical device to the marketplace . $ 15 k minimum investment required . serious inquiries only . to recieve your free video . fill out this form . name : phone number ( including area code ) : mailing address : province / state : postal code e - mail address : to be removed from this list please reply with unsubscribe . thank you . http : / / xent . com / mailman / listinfo / fork
## 531 Subject: re : site license for power world i concur .
## 714 Subject: rodrigo lamas - best wishes i would like to take the opportunity to let you know i have resigned today . i wish you all the best in your carreer and in life . regards rodrigo lamas
## 716 Subject: drogi vincenty . skoro ty byles na tyle mily ze zadzoniles osmielam sie i ja zaimejlowac do ciebie . w dniach 29 . 04 do 7 . 05 bede w usa . cel turystyczny do moich znajomych z roku . w programie mam wiele . glowna baza idahi springs . lece z mija corka i kolegami z roku . nie znam twojego miejsca zamieszkania , ale moze . tefon moich znajomych chyba 303 567 0190 , adres e - mailowy pzdrawiam zofia grodek . oni maja na nazwisko golebiowscy . get free email and a permanent address at http : / / www . netaddress . com / ? n = 1
## 1300 Subject: followup from iris mack hi , thank you for your email . i am indeed interested in exploring opportunities in your group - research ( quantitative modeling ) . just a note to let you know that i am not in the pure quantitative research group here at bnp paribas - but in the derivatives structured products . i do some of my own modeling and basic excel programming . i used to do a lot of fortran programming . however , i am not a very good c / c + + programmer . hope this gives you further details about my skills and interests . thank you , iris in iris , at this point it ' s my group : research , i . e . quantitative modeling . please , let me know what your interests are and i shall try to line up other groups for the interview . vince iris . mack @ bnpparibas . com on 06 / 09 / 2000 02 : 33 : 50 am to : vince . j . kaminski @ enron . com cc : subject : re [ 10 ] : greetings from london ( to enron ) hi , i will be out of the country until wednesday afternoon - london time . maybe we can chat then . also , could you please tell me about the group ( s ) that are interested in speaking with me . thanks , iris internet from : vince . j . kaminski @ enron . com on 06 / 06 / 2000 20 : 31 gmt to : iris mack cc : vince . j . kaminski bcc : subject : re : re [ 8 ] : greetings from london ( to enron ) iris , leaving for ca in a few minutes . i shall get back to you monday . vince iris . mack @ bnpparibas . com on 06 / 06 / 2000 10 : 36 : 46 am to : vince . j . kaminski @ enron . com cc : subject : re [ 8 ] : greetings from london ( to enron ) hi , thanks for your email . begining of july - what about july 4 th week ? could you give me a bit more info regarding the best days for you and your colleagues . thanks , iris internet from : vince . j . kaminski @ enron . com on 06 / 06 / 2000 14 : 29 gmt to : iris mack cc : vince . j . kaminski bcc : subject : re : re [ 6 ] : greetings from london ( to enron ) iris , the beginning of july would be better for us . please , let me know what is your availability . vince iris . mack @ bnpparibas . com on 06 / 06 / 2000 02 : 30 : 49 am to : vince . j . kaminski @ enron . com cc : subject : re [ 6 ] : greetings from london ( to enron ) hi , thank you for your email . how many days do we need ? i have checked my calendar . and think that i should be able to come on monday june 19 th ( tuesday june 20 th - if you need more than one day ) . . i can fly from london to houston during the following weekend to arrive in time for monday morning . let me know if these days are good for you and your colleagues . regards , iris internet from : vince . j . kaminski @ enron . com on 25 / 05 / 2000 18 : 33 gmt to : iris mack cc : vince . j . kaminski bcc : subject : re : re [ 4 ] : greetings from london ( to enron ) iris , we can invite you for an interview to houston . what would be a the time for you ? vince iris . mack @ bnpparibas . com on 05 / 25 / 2000 11 : 32 : 04 am to : vince . j . kaminski @ enron . com cc : subject : re [ 4 ] : greetings from london ( to enron ) hi , thank you for your prompt response . i am interested in any contacts you may have in your rolodex . also , i would be opened to talk to enron as well . please let me know more details . kind regards , iris internet from : vince . j . kaminski @ enron . com on 25 / 05 / 2000 16 : 19 gmt to : iris mack cc : vince . j . kaminski , stinson . gibner , grant . masson , pinnamaneni . krishnarao , vasant . shanbhogue bcc : subject : re : re [ 2 ] : greetings from london ( to enron ) iris , i shall go through my rolodex and try to find some good leads for you . i left investment banking 8 years ago and this field changes very fast . alternatively , would you be interested in a company like enron or another energy company in houston ? please , let me know . vince iris . mack @ bnpparibas . com on 05 / 25 / 2000 09 : 20 : 01 am to : vince . j . kaminski @ enron . com cc : subject : re [ 2 ] : greetings from london ( to enron ) hi , how are you ? thank you kindly for your email . sorry i have not responded sooner . currently i am working in derivatives structured products and risk management at bnp paribas in london . although i currently enjoy living and working in london , i may need to return to the states - because of my mother ' s failing health . do you know of any good contacts at investment banks that i may forward my details to ? for your information , i have attached my cv . ( please see attached file : iris marie mack . doc ) . thank you in advance for your time and consideration . kind regards , iris mack 44 ( 0 ) 20 7595 8665 ( work ) 44 ( 0 ) 20 7229 9986 ( home ) ( see attached file : iris marie mack . doc ) internet from : vince . j . kaminski @ enron . com on 04 / 04 / 2000 15 : 03 gmt to : iris mack cc : vince . j . kaminski bcc : subject : re : greetings from london ( to enron ) iris , please , feel free to give me a call when you have a few minutes . i shall be glad to chat with you . vince iris . mack @ paribas . com on 03 / 30 / 2000 02 : 24 : 27 am to : vkamins @ enron . com cc : denis . autier @ paribas . com subject : greetings from london ( to enron ) dear dr . kaminski , how are you ? it was nice to meet you at the real options conference in nyc . i was intrigued by some of the comments in your conference talk . in particular , by your use of real options to hedge financial options . this is something i am interested in as well . when you have some time , could we chat about this topic in a bit more detail ? thanks for your time and consideration . hope to hear from you soon . regards , iris mack - - - - - - - - - - - - - - - - this message is confidential ; its contents do not constitute a commitment by bnp paribas group * except where provided for in a written agreement between you and bnp paribas group * . any unauthorised disclosure , use or dissemination , either whole or partial , is prohibited . if you are not the intended recipient of the message , please notify the sender immediately . * bnp paribas group is a trading name of bnp sa and paribas sa ce message est confidentiel ; son contenu ne represente en aucun cas un engagement de la part du groupe bnp paribas * sous reserve de tout accord conclu par ecrit entre vous et le groupe bnp paribas * . toute publication , utilisation ou diffusion , meme partielle , doit etre autorisee prealablement . si vous n ' etes pas destinataire de ce message , merci d ' en avertir immediatement l ' expediteur . * le groupe bnp paribas est le nom commercial utilise par bnp sa et paribas sa ( see attached file : iris marie mack . doc ) ( see attached file : iris marie mack . doc ) ( see attached file : iris marie mack . doc ) ( see attached file : iris marie mack . doc ) ( see attached file : iris marie mack . doc ) - iris marie mack . doc
## 2623 Subject: year end 2000 performance feedback note : you will receive this message each time you are selected as a reviewer . you have been selected to participate in the year end 2000 performance management process by providing meaningful feedback on specific employee ( s ) . your feedback plays an important role in the process , and your participation is critical to the success of enron ' s performance management goals . to complete requests for feedback , access pep at http : / / pep . corp . enron . com and select perform review under performance review services . you may begin providing feedback immediately and are requested to have all feedback forms completed by friday , november 17 , 2000 . if you have any questions regarding pep or your responsibility in the process , please contact the pep help desk at : houston : 1 . 713 . 853 . 4777 , option 4 london : 44 . 207 . 783 . 4040 , option 4 email : perfmgmt @ enron . com thank you for your participation in this important process . the following is a cumulative list of employee feedback requests with a status of " open . " once you have submitted or declined an employee ' s request for feedback , their name will no longer appear on this list . review group : enron feedback due date : nov 17 , 2000 employee name supervisor name date selected - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - andrews , naveen c rudi c zipter oct 31 , 2000 baxter , ashley david davies nov 02 , 2000 campos , hector o peyton s gibner nov 06 , 2000 carson , richard l richard b buy oct 30 , 2000 crenshaw , shirley j wincenty j kaminski oct 26 , 2000 gandy , kristin h celeste c roberts nov 01 , 2000 gorny , vladimir theodore r murphy ii nov 02 , 2000 hewitt , kirstee l steven leppard nov 06 , 2000 kindall , kevin vasant shanbhogue oct 30 , 2000 lamas vieira pinto , rodrigo david port oct 31 , 2000 pham , bich anh t sarah brown nov 06 , 2000 raymond , maureen j wincenty j kaminski nov 02 , 2000 rosen , michael b christie a patrick nov 06 , 2000 sun , li kevin kindall nov 09 , 2000 supatgiat , chonawee peyton s gibner oct 27 , 2000 tamarchenko , tanya v vasant shanbhogue oct 26 , 2000 tawney , mark r jeffrey a shankman oct 26 , 2000 williams , matthew steven leppard nov 08 , 2000 yaman , sevil vasant shanbhogue oct 27 , 2000 yuan , ding richard l carson oct 31 , 2000
## spam .rnorm spam.fctr T.X000 T.X2000 T.X2001 T.X713 T.X853 T.abl
## 8 1 0.4869978 Y 0 0 0 0 0 0
## 50 1 0.8199824 Y 0 0 0 0 0 0
## 80 1 -0.3886005 Y 0 0 0 0 0 0
## 188 1 1.0866420 Y 0 0 0 0 0 0
## 211 1 -0.8747393 Y 0 0 0 0 0 0
## 257 1 -0.5233531 Y 10 2 0 0 0 0
## 297 1 -0.3092672 Y 2 0 0 0 0 0
## 336 1 0.8342941 Y 0 0 0 0 0 0
## 531 0 -1.7994395 N 0 0 0 0 0 0
## 714 0 -2.1376415 N 0 0 0 0 0 0
## 716 0 -0.1092869 N 0 0 0 0 0 0
## 1300 0 -0.8023712 N 0 11 0 0 0 1
## 2623 0 -0.4618030 N 0 24 0 1 1 0
## T.access T.account T.addit T.address T.allow T.alreadi T.also
## 8 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0
## 188 1 0 0 0 1 0 0
## 211 0 0 0 0 0 0 1
## 257 1 0 2 0 1 0 2
## 297 0 0 0 0 0 0 1
## 336 0 0 0 2 0 0 2
## 531 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0
## 716 0 0 0 1 0 0 0
## 1300 0 0 0 0 0 0 2
## 2623 1 0 0 0 0 0 0
## T.analysi T.anoth T.applic T.appreci T.approv T.april T.area T.arrang
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 0 0 0 0 0 0 0
## 211 0 0 0 0 0 0 0 0
## 257 1 0 0 0 4 1 0 0
## 297 0 0 0 1 0 0 0 0
## 336 0 0 0 0 0 0 1 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 1 0 0 0 0 0 0
## 2623 0 0 0 0 0 0 0 0
## T.ask T.assist T.associ T.attach T.attend T.avail T.back T.base
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 0 0 0 0 1 0 3
## 211 0 0 0 0 0 0 1 0
## 257 0 0 0 0 0 0 0 5
## 297 0 1 0 0 0 0 0 0
## 336 0 0 0 0 0 1 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 0 8 0 1 1 0
## 2623 0 0 0 0 0 0 0 0
## T.begin T.believ T.best T.better T.book T.bring T.busi T.buy T.call
## 8 0 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0 0
## 188 0 0 0 0 0 0 0 0 2
## 211 0 0 0 0 0 0 0 0 0
## 257 0 2 0 0 0 0 6 2 0
## 297 0 0 1 0 0 0 0 0 0
## 336 0 0 0 0 0 1 0 0 0
## 531 0 0 0 0 0 0 0 0 0
## 714 0 0 2 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0 0
## 1300 2 0 1 1 0 0 0 0 1
## 2623 1 0 0 0 0 0 0 1 0
## T.can T.case T.chang T.check T.click T.com T.come T.comment
## 8 0 0 0 0 0 0 0 0
## 50 1 0 1 1 0 0 0 0
## 80 1 0 0 1 0 0 0 0
## 188 4 0 0 0 0 6 0 0
## 211 1 0 0 1 0 2 0 0
## 257 1 0 2 1 0 4 0 0
## 297 0 0 0 0 0 0 1 0
## 336 2 0 0 0 0 1 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 1 0 0
## 1300 3 0 1 1 0 18 1 1
## 2623 0 0 0 0 0 2 0 0
## T.communic T.compani T.complet T.confer T.confirm T.contact T.continu
## 8 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0
## 188 0 0 1 0 0 1 0
## 211 0 0 0 0 0 0 0
## 257 0 14 1 0 0 2 4
## 297 0 1 0 0 0 2 0
## 336 0 1 0 0 0 0 0
## 531 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0
## 1300 0 2 0 2 0 2 0
## 2623 0 0 2 0 0 1 0
## T.contract T.copi T.corp T.corpor T.cost T.cours T.creat T.credit
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 1 0 0 0
## 188 0 0 0 0 1 0 0 0
## 211 0 0 0 0 0 0 0 0
## 257 6 0 0 2 2 0 1 1
## 297 0 0 0 0 0 0 0 0
## 336 0 0 0 0 0 0 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 0 0 0 0 0 0
## 2623 0 0 1 0 0 0 0 0
## T.crenshaw T.current T.custom T.data T.date T.day T.deal T.dear
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 1 0 0
## 188 0 0 0 4 0 1 0 0
## 211 0 1 0 0 0 0 0 0
## 257 0 4 3 0 0 0 0 0
## 297 0 0 0 0 0 0 0 0
## 336 0 0 0 0 0 0 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 2 0 0 0 4 0 1
## 2623 1 0 0 0 2 0 0 0
## T.depart T.deriv T.design T.detail T.develop T.differ T.direct
## 8 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 1
## 188 0 0 2 0 0 0 0
## 211 0 0 0 0 0 0 0
## 257 2 0 7 0 3 1 0
## 297 0 0 0 0 0 0 0
## 336 0 0 0 0 0 0 0
## 531 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0
## 1300 0 2 0 4 0 0 0
## 2623 0 0 0 0 0 0 0
## T.director T.discuss T.doc T.don T.done T.due T.ect T.edu T.effect
## 8 0 0 0 0 0 0 0 0 0
## 50 0 0 0 1 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0 0
## 188 0 0 0 1 0 0 0 0 0
## 211 0 0 0 0 0 0 0 0 0
## 257 1 0 0 0 0 1 0 0 1
## 297 0 0 0 0 0 0 0 0 0
## 336 0 0 0 0 0 0 0 0 1
## 531 0 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0 0
## 1300 0 0 8 0 0 0 0 0 0
## 2623 0 0 0 0 0 1 0 0 0
## T.effort T.either T.email T.end T.energi T.engin T.enron T.etc T.even
## 8 0 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0 1
## 188 0 0 0 0 0 0 0 1 0
## 211 0 0 2 0 0 0 0 0 0
## 257 1 0 0 0 0 3 0 1 0
## 297 1 0 1 0 0 0 0 0 0
## 336 0 0 0 0 0 0 0 0 0
## 531 0 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0 0
## 716 0 0 1 0 0 0 0 0 0
## 1300 0 1 4 0 1 0 24 0 0
## 2623 0 0 1 2 0 0 4 0 0
## T.event T.expect T.experi T.fax T.feel T.file T.final T.financ
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 0 0 3 0 1 0 0
## 211 0 0 0 0 0 0 0 0
## 257 0 4 2 0 0 0 0 5
## 297 0 1 0 0 0 0 0 0
## 336 0 0 0 0 0 0 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 0 0 1 7 0 0
## 2623 0 0 0 0 0 0 0 0
## T.financi T.find T.first T.follow T.form T.forward T.free T.friday
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 7 0
## 80 0 0 0 0 0 0 2 0
## 188 1 0 0 0 3 0 0 0
## 211 0 1 0 0 0 0 0 0
## 257 1 0 0 1 0 3 0 0
## 297 1 0 1 0 0 0 0 0
## 336 0 0 0 0 1 0 1 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 1 0
## 1300 1 1 0 1 0 1 1 0
## 2623 0 0 0 1 1 0 0 1
## T.full T.futur T.gas T.get T.gibner T.give T.given T.good T.great
## 8 0 0 0 1 0 0 0 0 0
## 50 0 0 0 0 0 0 0 1 0
## 80 0 0 0 1 0 0 0 0 0
## 188 0 1 0 1 0 0 0 0 1
## 211 0 0 0 0 0 0 0 0 0
## 257 0 1 0 0 0 0 0 0 1
## 297 0 0 0 0 0 0 0 0 0
## 336 2 1 0 0 0 0 0 1 0
## 531 0 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0 0
## 716 0 0 0 1 0 0 0 0 0
## 1300 0 0 0 1 1 3 0 4 0
## 2623 0 0 0 0 2 0 0 0 0
## T.group T.happi T.hear T.hello T.help T.high T.home T.hope T.hou
## 8 0 0 0 1 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0 0
## 80 0 1 0 0 0 0 1 0 0
## 188 0 0 0 0 0 0 2 0 0
## 211 0 0 0 1 2 0 0 0 0
## 257 0 0 0 0 1 1 0 0 0
## 297 0 0 0 0 1 0 0 0 0
## 336 0 0 0 0 0 1 0 0 0
## 531 0 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0 0
## 1300 11 0 1 0 0 0 1 2 0
## 2623 1 0 0 0 1 0 0 0 0
## T.hour T.houston T.howev T.http T.idea T.immedi T.import T.includ
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 3
## 80 0 0 0 0 0 0 0 0
## 188 0 0 0 0 0 0 0 0
## 211 0 0 0 1 0 0 0 0
## 257 0 0 0 1 0 1 1 7
## 297 0 0 0 0 0 1 0 0
## 336 0 0 0 1 0 0 0 1
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 1 0 0 0 0
## 1300 0 3 1 0 0 1 0 0
## 2623 0 1 0 1 0 1 2 0
## T.increas T.industri T.info T.inform T.interest T.intern T.internet
## 8 0 0 0 0 0 0 0
## 50 0 0 0 0 1 0 0
## 80 0 0 0 0 0 0 0
## 188 0 2 3 2 0 1 0
## 211 0 0 0 0 2 0 0
## 257 8 16 0 6 1 1 0
## 297 0 0 0 0 1 0 0
## 336 0 0 0 0 1 0 0
## 531 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0
## 1300 0 0 1 1 7 0 5
## 2623 0 0 0 0 0 0 0
## T.interview T.invest T.invit T.involv T.issu T.john T.join T.juli
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 0 0 0 0 0 0 0
## 211 0 0 0 0 0 0 0 0
## 257 3 4 0 1 1 0 0 0
## 297 0 1 0 0 0 0 0 0
## 336 0 2 0 0 0 0 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 2 2 1 0 0 0 0 3
## 2623 0 0 0 0 0 0 0 0
## T.just T.kaminski T.keep T.kevin T.know T.last T.let T.life T.like
## 8 0 0 0 0 0 0 0 0 1
## 50 0 0 0 0 0 0 0 1 0
## 80 0 0 0 0 1 0 0 0 0
## 188 0 0 2 0 0 2 0 0 0
## 211 1 0 0 0 2 0 2 0 0
## 257 0 0 0 0 0 0 0 0 0
## 297 0 0 2 0 1 1 0 0 0
## 336 0 0 0 0 0 0 0 0 1
## 531 0 0 0 0 0 0 0 0 0
## 714 0 0 0 0 1 0 1 1 1
## 716 0 0 0 0 0 0 0 0 0
## 1300 1 16 0 0 7 0 6 0 1
## 2623 0 2 0 2 0 0 0 0 0
## T.line T.link T.list T.locat T.london T.long T.look T.lot T.made
## 8 0 0 0 0 0 0 0 0 1
## 50 0 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0 0
## 188 0 0 0 0 0 0 0 0 0
## 211 0 1 0 0 0 0 1 0 0
## 257 2 0 0 1 0 0 4 0 2
## 297 0 0 0 0 0 0 0 0 0
## 336 0 0 1 0 0 0 1 0 0
## 531 0 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0 0
## 1300 1 0 0 0 15 0 0 1 0
## 2623 0 0 2 0 1 0 0 0 0
## T.mail T.make T.manag T.mani T.mark T.market T.may T.mean T.meet
## 8 0 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0 0
## 80 0 1 0 1 0 0 0 0 0
## 188 4 1 0 0 0 0 0 1 0
## 211 0 1 0 0 0 0 0 0 0
## 257 1 0 4 0 0 5 9 0 0
## 297 1 0 0 0 0 0 1 0 0
## 336 2 0 0 1 0 0 0 0 0
## 531 0 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0 0
## 1300 0 0 1 1 0 0 3 0 1
## 2623 0 0 2 0 1 0 1 0 0
## T.member T.mention T.messag T.might T.model T.monday T.money T.month
## 8 0 0 0 0 0 0 1 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 0 1 0 3 0 0 2
## 211 0 0 0 0 0 0 0 0
## 257 0 1 1 0 0 0 1 2
## 297 1 0 0 0 0 0 0 0
## 336 0 0 0 1 0 0 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 4 0 3 3 0 0
## 2623 0 0 1 0 0 0 0 0
## T.morn T.move T.much T.name T.need T.net T.new T.next. T.note T.now
## 8 0 0 0 0 0 0 0 0 0 0
## 50 1 0 1 0 0 0 1 0 0 5
## 80 0 0 0 0 0 0 0 0 0 1
## 188 0 0 0 0 1 0 0 0 0 0
## 211 0 0 0 1 0 0 1 0 0 0
## 257 0 1 0 0 1 4 8 4 0 1
## 297 0 0 0 0 1 0 0 0 0 2
## 336 1 0 0 1 0 0 0 0 0 1
## 531 0 0 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0 0 0
## 1300 1 0 0 1 3 0 0 0 1 0
## 2623 0 0 0 3 0 0 0 0 1 0
## T.number T.offer T.offic T.one T.onlin T.open T.oper T.opportun
## 8 0 0 0 1 1 0 0 0
## 50 0 1 0 0 0 0 0 0
## 80 0 1 0 0 0 0 0 0
## 188 0 0 2 1 0 0 1 0
## 211 0 0 0 0 0 0 0 0
## 257 0 2 3 2 0 2 6 1
## 297 0 0 0 1 0 0 0 0
## 336 1 0 0 0 0 0 0 1
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 1
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 0 1 0 1 0 1
## 2623 0 0 0 0 0 1 0 0
## T.option T.order T.origin T.part T.particip T.peopl T.per T.person
## 8 0 2 0 0 0 1 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 6 0 1 0 0 0 0
## 211 0 0 0 0 0 0 0 0
## 257 1 3 1 0 0 0 1 0
## 297 0 0 0 0 0 0 0 0
## 336 0 1 0 1 0 0 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 3 0 0 1 0 0 0 0
## 2623 2 0 0 0 3 0 0 0
## T.phone T.place T.plan T.pleas T.point T.posit T.possibl T.power
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 2 0 1 0 1 1 0
## 211 0 0 0 1 0 0 1 0
## 257 0 0 6 0 0 1 0 0
## 297 0 0 0 1 0 0 0 0
## 336 1 0 0 1 0 0 0 0
## 531 0 0 0 0 0 0 0 1
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 0 8 1 0 0 0
## 2623 0 0 0 1 0 0 0 0
## T.present T.price T.problem T.process T.product T.program T.project
## 8 0 0 0 0 0 0 0
## 50 0 0 0 0 1 0 0
## 80 0 0 0 0 0 0 0
## 188 0 1 2 0 1 0 0
## 211 0 0 0 0 0 0 0
## 257 1 4 0 4 5 1 5
## 297 0 0 0 0 0 0 0
## 336 0 0 0 0 0 0 1
## 531 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0
## 1300 0 0 0 0 2 2 0
## 2623 0 0 0 4 0 0 0
## T.provid T.public T.put T.question T.rate T.read T.real T.realli
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 0 0 0 2 0 0 0
## 211 0 0 0 0 0 0 0 0
## 257 5 5 0 0 0 0 0 0
## 297 0 0 0 0 0 0 0 0
## 336 4 0 0 0 0 0 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 1 1 0 0 0 0 2 0
## 2623 2 0 0 1 0 0 0 0
## T.receiv T.recent T.regard T.relat T.remov T.repli T.report T.request
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 1 0 0 0 0 0 0 0
## 211 0 0 0 0 0 0 0 0
## 257 4 2 0 2 0 0 3 0
## 297 1 0 2 0 0 1 0 0
## 336 0 0 0 0 1 1 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 1 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 5 0 0 0 0 0
## 2623 1 0 1 0 0 0 0 4
## T.requir T.research T.resourc T.respond T.respons T.result T.resum
## 8 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0
## 188 0 0 0 0 0 0 0
## 211 0 0 0 0 0 0 0
## 257 4 0 1 0 1 3 0
## 297 0 0 0 0 0 0 0
## 336 1 0 0 0 0 0 0
## 531 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0
## 1300 0 3 0 1 1 0 0
## 2623 0 0 0 0 1 0 0
## T.return T.review T.right T.risk T.robert T.run T.say T.schedul
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 0 0 0 0 0 0 0
## 211 0 0 0 0 0 0 0 0
## 257 0 0 0 1 0 0 0 1
## 297 0 0 0 0 0 0 0 0
## 336 1 0 0 0 0 0 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 1 0 0 1 0 0 0 0
## 2623 0 4 0 0 1 0 0 0
## T.school T.secur T.see T.send T.sent T.servic T.set T.sever T.shall
## 8 0 1 0 0 0 0 0 0 0
## 50 0 0 1 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0 0
## 188 0 0 0 0 0 0 2 0 0
## 211 0 0 1 0 0 0 0 0 0
## 257 14 4 0 0 0 4 0 0 0
## 297 0 0 0 0 0 0 0 0 0
## 336 0 0 0 0 0 1 0 0 0
## 531 0 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0 0
## 1300 0 0 7 0 0 0 0 0 4
## 2623 0 0 0 0 0 1 0 0 0
## T.shirley T.short T.sinc T.sincer T.site T.softwar T.soon T.sorri
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 0 0 0 0 0 1 0
## 211 0 0 0 0 0 0 0 0
## 257 0 0 2 0 3 0 0 0
## 297 0 0 1 1 0 0 0 0
## 336 0 0 0 0 0 0 0 0
## 531 0 0 0 0 1 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 0 0 0 0 1 1
## 2623 1 0 0 0 0 0 0 0
## T.special T.specif T.start T.state T.still T.stinson T.student
## 8 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0
## 188 0 0 0 0 0 0 0
## 211 0 0 0 0 0 0 0
## 257 1 2 1 11 0 0 0
## 297 0 0 0 0 0 0 0
## 336 0 0 0 3 0 0 0
## 531 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0
## 1300 0 0 0 1 0 1 0
## 2623 0 1 0 0 0 0 0
## T.subject T.success T.suggest T.support T.sure T.system T.take T.talk
## 8 1 0 0 0 0 0 0 0
## 50 1 0 0 0 0 0 1 0
## 80 1 0 0 0 0 0 1 0
## 188 1 0 0 0 1 9 0 0
## 211 1 0 0 1 0 0 0 0
## 257 2 3 0 1 0 0 0 0
## 297 1 0 0 0 0 0 0 0
## 336 1 0 0 0 0 0 0 0
## 531 1 0 0 0 0 0 0 0
## 714 1 0 0 0 0 0 1 0
## 716 1 0 0 0 0 0 0 0
## 1300 12 0 0 0 0 0 0 2
## 2623 1 1 0 0 0 0 0 0
## T.team T.term T.thank T.thing T.think T.thought T.thursday T.time
## 8 0 0 0 0 0 0 0 0
## 50 0 0 1 0 0 0 0 0
## 80 0 0 1 0 0 0 0 0
## 188 0 0 0 0 0 0 0 3
## 211 0 0 1 0 0 0 0 0
## 257 2 2 0 0 0 0 0 3
## 297 0 0 0 0 0 0 0 0
## 336 0 0 1 0 0 0 0 1
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 10 0 1 0 0 6
## 2623 0 0 1 0 0 0 0 1
## T.today T.togeth T.trade T.tri T.tuesday T.two T.type T.understand
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 1 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 2 0 0 0 0 0 0 1
## 211 0 0 0 0 0 0 0 0
## 257 0 0 0 0 0 2 1 0
## 297 0 0 0 0 0 0 0 0
## 336 2 0 0 0 0 0 0 0
## 531 0 0 0 0 0 0 0 0
## 714 1 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 1 2 1 0 0 0
## 2623 0 0 0 0 0 0 0 0
## T.unit T.univers T.updat T.use T.valu T.version T.vinc T.visit
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 0 0 1 0 0 0 0
## 211 0 0 0 0 0 0 0 0
## 257 0 1 2 3 0 0 0 0
## 297 2 0 0 0 0 0 0 0
## 336 0 0 0 2 0 0 0 0
## 531 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0
## 1300 0 0 0 3 0 0 21 0
## 2623 0 0 0 0 0 0 0 0
## T.vkamin T.want T.way T.web T.websit T.wednesday T.week T.well T.will
## 8 0 0 1 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0 0
## 80 0 1 0 0 0 0 1 0 0
## 188 0 0 0 3 0 0 0 0 3
## 211 0 1 2 0 1 0 0 0 0
## 257 0 0 0 0 2 0 1 2 7
## 297 0 0 0 0 0 0 0 0 1
## 336 0 0 0 0 0 0 0 1 0
## 531 0 0 0 0 0 0 0 0 0
## 714 0 0 0 0 0 0 0 0 0
## 716 0 0 0 0 0 0 0 0 0
## 1300 1 0 0 0 0 1 1 2 1
## 2623 0 0 0 0 0 0 0 0 2
## T.wish T.within T.without T.work T.write T.www T.year T.has.http
## 8 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0
## 80 0 0 0 0 0 0 0 0
## 188 0 0 0 3 0 0 0 0
## 211 0 0 0 1 0 2 0 1
## 257 0 1 2 1 0 1 7 1
## 297 1 0 0 0 1 0 1 0
## 336 0 0 0 0 0 0 3 1
## 531 0 0 0 0 0 0 0 0
## 714 2 0 0 0 0 0 0 0
## 716 0 0 0 0 0 1 0 1
## 1300 0 0 0 3 0 0 1 0
## 2623 0 0 0 0 0 0 2 1
## T.num.chars T.num.words T.num.words.unq T.num.chars.log
## 8 347 33 31 5.852202
## 50 935 96 61 6.841615
## 80 783 74 65 6.664409
## 188 3253 309 170 8.087640
## 211 762 67 48 6.637258
## 257 12615 1171 521 9.442721
## 297 1672 142 114 7.422374
## 336 2122 203 157 7.660585
## 531 54 6 6 4.007333
## 714 193 19 15 5.267858
## 716 547 64 61 6.306275
## 1300 7005 612 241 8.854522
## 2623 2407 248 142 7.786552
## T.num.words.log T.num.words.unq.log spam.fctr.predict.Final.rf.prob
## 8 3.526361 3.465736 1.000
## 50 4.574711 4.127134 0.352
## 80 4.317488 4.189655 0.040
## 188 5.736572 5.141664 0.292
## 211 4.219508 3.891820 0.344
## 257 7.066467 6.257668 0.332
## 297 4.962845 4.744932 0.334
## 336 5.318120 5.062595 0.344
## 531 1.945910 1.945910 0.448
## 714 2.995732 2.772589 0.906
## 716 4.174387 4.127134 0.860
## 1300 6.418365 5.488938 0.038
## 2623 5.517453 4.962845 0.000
## spam.fctr.predict.Final.rf spam.fctr.predict.Final.rf.accurate .label
## 8 Y TRUE .8
## 50 N FALSE .50
## 80 N FALSE .80
## 188 N FALSE .188
## 211 N FALSE .211
## 257 N FALSE .257
## 297 N FALSE .297
## 336 N FALSE .336
## 531 Y FALSE .531
## 714 Y FALSE .714
## 716 Y FALSE .716
## 1300 N TRUE .1300
## 2623 N TRUE .2623
tmp_replay_lst <- replay.petrisim(pn=glb_analytics_pn,
replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"data.new.prediction")), flip_coord=TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## 2.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction firing: model.selected
## 3.0000 3 0 2 1 0
## 3.0000 multiple enabled transitions: model.final data.training.all.prediction data.new.prediction firing: data.training.all.prediction
## 4.0000 5 0 1 1 1
## 4.0000 multiple enabled transitions: model.final data.training.all.prediction data.new.prediction firing: model.final
## 5.0000 4 0 0 2 1
## 6.0000 6 0 0 1 2
print(ggplot.petrinet(tmp_replay_lst[["pn"]]) + coord_flip())
Null Hypothesis (\(\sf{H_{0}}\)): mpg is not impacted by am_fctr.
The variance by am_fctr appears to be independent. #{r q1, cache=FALSE} # print(t.test(subset(cars_df, am_fctr == "automatic")$mpg, # subset(cars_df, am_fctr == "manual")$mpg, # var.equal=FALSE)$conf) # We reject the null hypothesis i.e. we have evidence to conclude that am_fctr impacts mpg (95% confidence). Manual transmission is better for miles per gallon versus automatic transmission.
## chunk_label chunk_step_major chunk_step_minor elapsed
## 11 fit.models 5 2 1302.645
## 13 fit.data.training.all 6 1 1556.761
## 10 fit.models 5 1 173.248
## 7 select_features 4 0 54.204
## 9 fit.models 5 0 96.019
## 12 fit.data.training.all 6 0 1318.755
## 14 predict.data.new 7 0 1565.909
## 6 extract.features 3 0 7.282
## 2 cleanse_data 2 0 2.315
## 4 manage_missing_data 2 2 4.095
## 8 remove_correlated_features 4 1 55.580
## 5 encodeORretype.data 2 3 4.383
## 3 inspectORexplore.data 2 1 2.428
## 1 import_data 1 0 0.002
## elapsed_diff
## 11 1129.397
## 13 238.006
## 10 77.229
## 7 46.922
## 9 40.439
## 12 16.110
## 14 9.148
## 6 2.899
## 2 2.313
## 4 1.667
## 8 1.376
## 5 0.288
## 3 0.113
## 1 0.000
## [1] "Total Elapsed Time: 1,565.909 secs"
## R version 3.1.3 (2015-03-09)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.10.3 (Yosemite)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] tcltk grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] randomForest_4.6-10 rpart.plot_1.5.2 rpart_4.1-9
## [4] ROCR_1.0-7 gplots_2.16.0 caret_6.0-41
## [7] lattice_0.20-31 tm_0.6 NLP_0.1-6
## [10] sqldf_0.4-10 RSQLite_1.0.0 DBI_0.3.1
## [13] gsubfn_0.6-6 proto_0.3-10 reshape2_1.4.1
## [16] plyr_1.8.1 caTools_1.17.1 doBy_4.5-13
## [19] survival_2.38-1 ggplot2_1.0.1
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-6 BradleyTerry2_1.0-6 brglm_0.5-9
## [4] car_2.0-25 chron_2.3-45 class_7.3-12
## [7] codetools_0.2-11 colorspace_1.2-6 compiler_3.1.3
## [10] digest_0.6.8 e1071_1.6-4 evaluate_0.5.5
## [13] foreach_1.4.2 formatR_1.1 gdata_2.13.3
## [16] gtable_0.1.2 gtools_3.4.1 htmltools_0.2.6
## [19] iterators_1.0.7 KernSmooth_2.23-14 knitr_1.9
## [22] labeling_0.3 lme4_1.1-7 MASS_7.3-40
## [25] Matrix_1.2-0 mgcv_1.8-6 minqa_1.2.4
## [28] munsell_0.4.2 nlme_3.1-120 nloptr_1.0.4
## [31] nnet_7.3-9 parallel_3.1.3 pbkrtest_0.4-2
## [34] quantreg_5.11 RColorBrewer_1.1-2 Rcpp_0.11.5
## [37] rmarkdown_0.5.1 scales_0.2.4 slam_0.1-32
## [40] SparseM_1.6 splines_3.1.3 stringr_0.6.2
## [43] tools_3.1.3 yaml_2.1.13